def empty(shape): mat = cudamat.cudamat() err_code = _cudamat.init_empty(ct.pointer(mat), ct.c_int(shape[0]), ct.c_int(shape[1])) if err_code: raise generate_exception(err_code) return CUDAMatrix(mat)
def gpuarray_to_garray(x): "Creates a Gnumpy garray that uses the same memory as PyCUDA GPUArray x" # mostly adapted from Theano assert isinstance(x, gpuarray.GPUArray), "x must be a PyCUDA GPUArray" assert x.dtype == np.float32, "x must be of data type float32" # check that x is in continous row-major order size = 1 continous = True ndim = len(x.shape) for i in range(ndim-1, -1, -1): if x.shape[i] == 1: continue if x.strides[i] != size*4: continous = False break size *= x.shape[i] assert continous, "x must be in continous row-major order" # the next step is to create a CUDAMatrix object. We do so by first creating # a cudamat object with no data_host. cm_mat = cudamat.cudamat() cm_mat.size[0] = reduce(lambda x,y:x*y, x.shape, 1) cm_mat.size[1] = 1 cm_mat.data_device = ctypes.cast(x.ptr, ctypes.POINTER(ctypes.c_float)) cm_mat.on_host = 0 cm_mat.on_device = 1 cm_mat.is_trans = 0 cm_mat.owns_data = 0 # note: cm_mat dosen't owe the data; x does. So x will delete it. # create CUDAMatrix px = cudamat.CUDAMatrix(cm_mat) px._base = x # x won't be freed if the cudamat object isn't freed. px.mat_on_host = False # let cudamat know that we don't have a numpy # array attached. # create garray ans = gnumpy.garray(px, x.shape, ___const_garray) return ans
def test_nothing(): print "nothing done" cm_mat = cudamat.cudamat() cm_mat.size[0] = 1 cm_mat.size[1] = 1 #cm_mat.data_device = ctypes.cast(x.ptr, ctypes.POINTER(ctypes.c_float)) cm_mat.on_host = 0 cm_mat.on_device = 1 cm_mat.is_trans = 0 cm_mat.owns_data = 0 # note: cm_mat dosen't owe the data; x does. So x will delete it. # create CUDAMatrix px = cudamat.CUDAMatrix(cm_mat) px._base = None # x won't be freed if the cudamat object isn't freed. px.mat_on_host = False # let cudamat know that we don't have a numpy # array attached. # create garray ans = gnumpy.garray(px, [3,2], ___const_garray)
def cudandarray_to_garray(x, copyif=False): """ take a CudaNdarray and return a gnumpy.garray object. :type x: CudaNdarray :param x: The array to transform to gnumpy.garray. :type copyif: bool :param copyif: If False, raise an error if x is not c contiguous. If it is c contiguous, we return a GPUArray that share the same memory region as x. If True, copy x if it is no c contiguous, so the return won't shape the same memory region. If c contiguous, the return will share the same memory region. We need to do this as GPUArray don't fully support strided memory. :return type: cudamat.CUDAMatrix """ if not isinstance(x, cuda.CudaNdarray): raise ValueError("We can transfer only CudaNdarray to cudamat.CUDAMatrix") else: # Check if it is c contiguous size = 1 c_contiguous = True for i in range(x.ndim-1, -1, -1): if x.shape[i] == 1: continue if x._strides[i] != size: c_contiguous = False break size *= x.shape[i] if not c_contiguous: if copyif: x = x.copy() else: raise ValueError("We where asked to don't copy memory, but the memory is not c contiguous.") # Now x is always c contiguous. # the next step is to create a CUDAMatrix object. We do so by first creating # a cudamat object with no data_host. cm_mat = cudamat.cudamat() cm_mat.size[0] = reduce(lambda x,y:x*y, x.shape, 1) cm_mat.size[1] = 1 cm_mat.on_host = 0 cm_mat.on_device = 1 cm_mat.is_trans = 0 cm_mat.owns_data = 0 # <-- note: cm_mat dosen't owe the data; x does. So x will delete it. # x.gpudata is a long. We need a pointer to a float. cast. import ctypes cm_mat.data_device = ctypes.cast(x.gpudata, ctypes.POINTER(ctypes.c_float)) px = cudamat.CUDAMatrix(cm_mat) px._base = x # x won't be freed if the cudamat object isn't freed. px.mat_on_host = False # let cudamat know that we don't have a numpy # array attached. # Note how gnumpy tracks its cudamat objects: it moves things to the # _cmsReuseCache when the gnumpy array is deleted, thus the arrays # returned by theano will never be deleted. # However, if the garray thinks that the object is a view, then it won't # move the _base to the _cmsResueCache; so the cudamat object will be deleted, # and we won't overpump the world with memory. _is_alias_of = ___const_garray ans = gnumpy.garray(px, x.shape, _is_alias_of) return ans
def cudandarray_to_cudamat(x, copyif=False): """ take a CudaNdarray and return a cudamat.CUDAMatrix object. :type x: CudaNdarray :param x: The array to transform to cudamat.CUDAMatrix. :type copyif: bool :param copyif: If False, raise an error if x is not c contiguous. If it is c contiguous, we return a GPUArray that share the same memory region as x. If True, copy x if it is no c contiguous, so the return won't shape the same memory region. If c contiguous, the return will share the same memory region. We need to do this as GPUArray don't fully support strided memory. :return type: cudamat.CUDAMatrix """ if not isinstance(x, cuda.CudaNdarray): raise ValueError( "We can transfer only CudaNdarray to cudamat.CUDAMatrix") elif x.ndim != 2: raise TypeError( "cudandarray_to_cudamat: input must be 2-d (has %s dims). That's " "because cudamat arrays are always 2-dimensional") else: # Check if it is c contiguous size = 1 c_contiguous = True for i in range(x.ndim - 1, -1, -1): if x.shape[i] == 1: continue if x._strides[i] != size: c_contiguous = False break size *= x.shape[i] if not c_contiguous: if copyif: x = x.copy() else: raise ValueError( "We where asked to don't copy memory, but the memory is not c contiguous." ) # Now x is always c contiguous. # the next step is to create a CUDAMatrix object. We do so by first creating # a cudamat object with no data_host. cm_mat = cudamat.cudamat() cm_mat.size[0] = x.shape[0] cm_mat.size[1] = x.shape[1] cm_mat.on_host = 0 cm_mat.on_device = 1 cm_mat.is_trans = 0 cm_mat.owns_data = 0 # <-- note: cm_mat dosen't owe the data; x does. So x will delete it. # x.gpudata is a long. We need a pointer to a float. cast. import ctypes cm_mat.data_device = ctypes.cast(x.gpudata, ctypes.POINTER(ctypes.c_float)) px = cudamat.CUDAMatrix(cm_mat) px._base = x # x won't be __del__'ed as long as px is around. px.mat_on_host = False # let cudamat know that we don't have a numpy # array attached. return px
def cudandarray_to_garray(x, copyif=False): """ take a CudaNdarray and return a gnumpy.garray object. :type x: CudaNdarray :param x: The array to transform to gnumpy.garray. :type copyif: bool :param copyif: If False, raise an error if x is not c contiguous. If it is c contiguous, we return a GPUArray that share the same memory region as x. If True, copy x if it is no c contiguous, so the return won't shape the same memory region. If c contiguous, the return will share the same memory region. We need to do this as GPUArray don't fully support strided memory. :return type: cudamat.CUDAMatrix """ if not isinstance(x, cuda.CudaNdarray): raise ValueError( "We can transfer only CudaNdarray to cudamat.CUDAMatrix") else: # Check if it is c contiguous size = 1 c_contiguous = True for i in range(x.ndim - 1, -1, -1): if x.shape[i] == 1: continue if x._strides[i] != size: c_contiguous = False break size *= x.shape[i] if not c_contiguous: if copyif: x = x.copy() else: raise ValueError( "We where asked to don't copy memory, but the memory is not c contiguous." ) # Now x is always c contiguous. # the next step is to create a CUDAMatrix object. We do so by first creating # a cudamat object with no data_host. cm_mat = cudamat.cudamat() cm_mat.size[0] = reduce(lambda x, y: x * y, x.shape, 1) cm_mat.size[1] = 1 cm_mat.on_host = 0 cm_mat.on_device = 1 cm_mat.is_trans = 0 cm_mat.owns_data = 0 # <-- note: cm_mat dosen't owe the data; x does. So x will delete it. # x.gpudata is a long. We need a pointer to a float. cast. import ctypes cm_mat.data_device = ctypes.cast(x.gpudata, ctypes.POINTER(ctypes.c_float)) px = cudamat.CUDAMatrix(cm_mat) px._base = x # x won't be freed if the cudamat object isn't freed. px.mat_on_host = False # let cudamat know that we don't have a numpy # array attached. # Note how gnumpy tracks its cudamat objects: it moves things to the # _cmsReuseCache when the gnumpy array is deleted, thus the arrays # returned by theano will never be deleted. # However, if the garray thinks that the object is a view, then it won't # move the _base to the _cmsResueCache; so the cudamat object will be deleted, # and we won't overpump the world with memory. _is_alias_of = ___const_garray ans = gnumpy.garray(px, x.shape, _is_alias_of) return ans
def cudandarray_to_cudamat(x, copyif=False): """ take a CudaNdarray and return a cudamat.CUDAMatrix object. :type x: CudaNdarray :param x: The array to transform to cudamat.CUDAMatrix. :type copyif: bool :param copyif: If False, raise an error if x is not c contiguous. If it is c contiguous, we return a GPUArray that share the same memory region as x. If True, copy x if it is no c contiguous, so the return won't shape the same memory region. If c contiguous, the return will share the same memory region. We need to do this as GPUArray don't fully support strided memory. :return type: cudamat.CUDAMatrix """ if not isinstance(x, cuda.CudaNdarray): raise ValueError("We can transfer only CudaNdarray to cudamat.CUDAMatrix") elif x.ndim != 2: raise TypeError("cudandarray_to_cudamat: input must be 2-d (has %s dims). That's " "because cudamat arrays are always 2-dimensional") else: # Check if it is c contiguous size = 1 c_contiguous = True for i in range(x.ndim - 1, -1, -1): if x.shape[i] == 1: continue if x._strides[i] != size: c_contiguous = False break size *= x.shape[i] if not c_contiguous: if copyif: x = x.copy() else: raise ValueError("We where asked to don't copy memory, but the memory is not c contiguous.") # Now x is always c contiguous. # the next step is to create a CUDAMatrix object. We do so by first creating # a cudamat object with no data_host. cm_mat = cudamat.cudamat() cm_mat.size[0] = x.shape[0] cm_mat.size[1] = x.shape[1] cm_mat.on_host = 0 cm_mat.on_device = 1 cm_mat.is_trans = 0 cm_mat.owns_data = 0 # <-- note: cm_mat dosen't owe the data; x does. So x will delete it. # x.gpudata is a long. We need a pointer to a float. cast. import ctypes cm_mat.data_device = ctypes.cast(x.gpudata, ctypes.POINTER(ctypes.c_float)) px = cudamat.CUDAMatrix(cm_mat) px._base = x # x won't be __del__'ed as long as px is around. # let cudamat know that we don't have a numpy array attached. px.mat_on_host = False return px