Python CU.error Examples, cuda4py._py.CU.error Python Examples

Example #1

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def exec_z2d(self, idata, odata):
     """Executes a double-precision complex-to-real,
     implicitly inverse, cuFFT transform plan.
     """
     err = self._lib.cufftExecZ2D(self.handle, idata, odata)
     if err:
         raise CU.error("cufftExecZ2D", err)

Example #2

0

Show file

File: _cufft.py Project: nagyistge/cuda4py

 def exec_z2z(self, idata, odata, direction):
     """Executes a double-precision complex-to-complex
     cuFFT transform plan.
     """
     err = self._lib.cufftExecZ2Z(self.handle, idata, odata, direction)
     if err:
         raise CU.error("cufftExecZ2Z", err)

Example #3

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def exec_c2c(self, idata, odata, direction):
     """Executes a single-precision complex-to-complex
     cuFFT transform plan.
     """
     err = self._lib.cufftExecC2C(self.handle, idata, odata, direction)
     if err:
         raise CU.error("cufftExecC2C", err)

Example #4

0

Show file

File: _curand.py Project: Samsung/cuda4py

    def __init__(self, context, rng_type=CURAND_RNG_PSEUDO_DEFAULT):
        """Constructor.

        Parameters:
            context: CUDA context handle or None to use the host generator.
            rng_type: type of the random generator.
        """
        self._context = context
        self._lib = None
        if context is not None:
            context._add_ref(self)
        initialize()
        handle = ffi.new("curandGenerator_t *")
        if context is not None:
            with context:
                err = lib.curandCreateGenerator(handle, int(rng_type))
        else:
            err = lib.curandCreateGeneratorHost(handle, int(rng_type))
        if err:
            self._handle = None
            raise CU.error("curandCreateGenerator" if context is not None
                           else "curandCreateGeneratorHost", err)
        self._lib = lib  # to hold the reference
        self._handle = int(handle[0])
        self._rng_type = int(rng_type)
        self._seed = 0
        self._offset = 0
        self._ordering = 0
        self._dimensions = 0

Example #5

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def exec_d2z(self, idata, odata):
     """Executes a double-precision real-to-complex,
     implicitly forward, cuFFT transform plan.
     """
     err = self._lib.cufftExecD2Z(self.handle, idata, odata)
     if err:
         raise CU.error("cufftExecD2Z", err)

Example #6

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

    def convolution_backward_data(
            self, alpha, filter_desc, filter_data, diff_desc, diff_data,
            conv_desc, beta, grad_desc, grad_data,
            algo=None, workspace=None, workspace_size=0):
        """Computes backpropagated error.

        Parameters:
            alpha: diff_data multiplier (numpy array with one element).
            beta: grad_data multiplier (numpy array with one element).
            filter_data: convolutional kernels.
            diff_data: error for backpropagation.
            grad_data: backpropagated error.
        """
        if self.version < 4000:
            err = self._lib.cudnnConvolutionBackwardData(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        elif algo is None:
            err = self._lib.cudnnConvolutionBackwardData_v2(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        else:
            err = self._lib.cudnnConvolutionBackwardData(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                algo, workspace, workspace_size,
                CU.extract_ptr(beta), grad_desc, grad_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardData", err)

Example #7

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

    def convolution_backward_filter(
            self, alpha, src_desc, src_data, diff_desc, diff_data, conv_desc,
            beta, grad_desc, grad_data,
            algo=None, workspace=None, workspace_size=0):
        """Computes gradient for the convolutional kernels.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: grad_data multiplier (numpy array with one element).
            src_data: input from the forward pass.
            diff_data: error for backpropagation.
            grad_data: gradient for convolutional kernels.
        """
        if self.version < 4000:
            err = self._lib.cudnnConvolutionBackwardFilter(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        elif algo is None:
            err = self._lib.cudnnConvolutionBackwardFilter_v2(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        else:
            err = self._lib.cudnnConvolutionBackwardFilter(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                algo, workspace, workspace_size,
                CU.extract_ptr(beta), grad_desc, grad_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardFilter", err)

Example #8

0

Show file

File: _cufft.py Project: nagyistge/cuda4py

 def exec_d2z(self, idata, odata):
     """Executes a double-precision real-to-complex,
     implicitly forward, cuFFT transform plan.
     """
     err = self._lib.cufftExecD2Z(self.handle, idata, odata)
     if err:
         raise CU.error("cufftExecD2Z", err)

Example #9

0

Show file

File: _cufft.py Project: nagyistge/cuda4py

 def workarea(self, value):
     """Sets workarea for plan execution.
     """
     err = self._lib.cufftSetWorkArea(self.handle, value)
     if err:
         raise CU.error("cufftSetWorkArea", err)
     self._workarea = value

Example #10

0

Show file

File: _cufft.py Project: nagyistge/cuda4py

 def exec_c2c(self, idata, odata, direction):
     """Executes a single-precision complex-to-complex
     cuFFT transform plan.
     """
     err = self._lib.cufftExecC2C(self.handle, idata, odata, direction)
     if err:
         raise CU.error("cufftExecC2C", err)

Example #11

0

Show file

File: _cufft.py Project: nagyistge/cuda4py

 def exec_z2d(self, idata, odata):
     """Executes a double-precision complex-to-real,
     implicitly inverse, cuFFT transform plan.
     """
     err = self._lib.cufftExecZ2D(self.handle, idata, odata)
     if err:
         raise CU.error("cufftExecZ2D", err)

Example #12

0

Show file

File: _curand.py Project: ra2003/cuda4py

 def ordering(self, value):
     """Sets generator ordering.
     """
     err = self._lib.curandSetGeneratorOrdering(self.handle, int(value))
     if err:
         raise CU.error("curandSetGeneratorOrdering", err)
     self._ordering = int(value)

Example #13

0

Show file

File: _curand.py Project: ra2003/cuda4py

 def offset(self, value):
     """Sets generator offset as an 64-bit integer.
     """
     err = self._lib.curandSetGeneratorOffset(self.handle, int(value))
     if err:
         raise CU.error("curandSetGeneratorOffset", err)
     self._offset = int(value)

Example #14

0

Show file

File: _curand.py Project: ra2003/cuda4py

    def __init__(self, context, rng_type=CURAND_RNG_PSEUDO_DEFAULT):
        """Constructor.

        Parameters:
            context: CUDA context handle or None to use the host generator.
            rng_type: type of the random generator.
        """
        self._context = context
        self._lib = None
        if context is not None:
            context._add_ref(self)
        initialize()
        handle = ffi.new("curandGenerator_t *")
        if context is not None:
            with context:
                err = lib.curandCreateGenerator(handle, int(rng_type))
        else:
            err = lib.curandCreateGeneratorHost(handle, int(rng_type))
        if err:
            self._handle = None
            raise CU.error(
                "curandCreateGenerator"
                if context is not None else "curandCreateGeneratorHost", err)
        self._lib = lib  # to hold the reference
        self._handle = int(handle[0])
        self._rng_type = int(rng_type)
        self._seed = 0
        self._offset = 0
        self._ordering = 0
        self._dimensions = 0

Example #15

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def workarea(self, value):
     """Sets workarea for plan execution.
     """
     err = self._lib.cufftSetWorkArea(self.handle, value)
     if err:
         raise CU.error("cufftSetWorkArea", err)
     self._workarea = value

Example #16

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def exec_z2z(self, idata, odata, direction):
     """Executes a double-precision complex-to-complex
     cuFFT transform plan.
     """
     err = self._lib.cufftExecZ2Z(self.handle, idata, odata, direction)
     if err:
         raise CU.error("cufftExecZ2Z", err)

Example #17

0

Show file

File: _curand.py Project: ra2003/cuda4py

 def dimensions(self, value):
     """Sets quasirandom generator dimensions.
     """
     err = self._lib.curandSetQuasiRandomGeneratorDimensions(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetQuasiRandomGeneratorDimensions", err)
     self._dimensions = int(value)

Example #18

0

Show file

File: _curand.py Project: ra2003/cuda4py

 def seed(self, value):
     """Sets generator seed as an 64-bit integer.
     """
     err = self._lib.curandSetPseudoRandomGeneratorSeed(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetPseudoRandomGeneratorSeed", err)
     self._seed = int(value)

Example #19

0

Show file

File: _curand.py Project: Samsung/cuda4py

 def offset(self, value):
     """Sets generator offset as an 64-bit integer.
     """
     err = self._lib.curandSetGeneratorOffset(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetGeneratorOffset", err)
     self._offset = int(value)

Example #20

0

Show file

File: _curand.py Project: Samsung/cuda4py

 def seed(self, value):
     """Sets generator seed as an 64-bit integer.
     """
     err = self._lib.curandSetPseudoRandomGeneratorSeed(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetPseudoRandomGeneratorSeed", err)
     self._seed = int(value)

Example #21

0

Show file

File: _curand.py Project: Samsung/cuda4py

 def ordering(self, value):
     """Sets generator ordering.
     """
     err = self._lib.curandSetGeneratorOrdering(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetGeneratorOrdering", err)
     self._ordering = int(value)

Example #22

0

Show file

File: _cufft.py Project: nagyistge/cuda4py

 def version(self):
     """Returns cuFFT version.
     """
     version = ffi.new("int *")
     err = self._lib.cufftGetVersion(version)
     if err:
         raise CU.error("cufftGetVersion", err)
     return int(version[0])

Example #23

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def size(self):
     """Returns actual size of the work area required to support the plan.
     """
     sz = ffi.new("size_t[]", 4)
     err = self._lib.cufftGetSize(self.handle, sz)
     if err:
         raise CU.error("cufftGetSize", err)
     return int(sz[0])

Example #24

0

Show file

File: _cufft.py Project: nagyistge/cuda4py

 def size(self):
     """Returns actual size of the work area required to support the plan.
     """
     sz = ffi.new("size_t[]", 4)
     err = self._lib.cufftGetSize(self.handle, sz)
     if err:
         raise CU.error("cufftGetSize", err)
     return int(sz[0])

Example #25

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def version(self):
     """Returns cuFFT version.
     """
     version = ffi.new("int *")
     err = self._lib.cufftGetVersion(version)
     if err:
         raise CU.error("cufftGetVersion", err)
     return int(version[0])

Example #26

0

Show file

File: _curand.py Project: Samsung/cuda4py

 def dimensions(self, value):
     """Sets quasirandom generator dimensions.
     """
     err = self._lib.curandSetQuasiRandomGeneratorDimensions(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetQuasiRandomGeneratorDimensions", err)
     self._dimensions = int(value)

Example #27

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

 def get_pooling_2d_forward_output_dim(pooling_desc, input_desc):
     """Returns tuple of n, c, h, w for an output.
     """
     n, c, h, w = (ffi.new("int *") for _ in range(4))
     err = lib.cudnnGetPooling2dForwardOutputDim(
         pooling_desc, input_desc, n, c, h, w)
     if err:
         raise CU.error("cudnnGetPooling2dForwardOutputDim", err)
     return int(n[0]), int(c[0]), int(h[0]), int(w[0])

Example #28

0

Show file

File: _cufft.py Project: nagyistge/cuda4py

    def make_plan_many(self, xyz, batch, fft_type,
                       inembed=None, istride=1, idist=0,
                       onembed=None, ostride=1, odist=0):
        """Makes 1, 2 or 3 dimensional FFT plan.

        Parameters:
            xyz: tuple of dimensions.
            batch: number of FFTs to make.
            fft_type: type of FFT (CUFFT_R2C, CUFFT_C2R etc.).
            inembed: tuple with storage dimensions of the input data in memory
                     (can be None).
            istride: distance between two successive input elements
                     in the least significant (i.e., innermost) dimension.
            idist: distance between the first element of two consecutive
                   signals in a batch of the input data.
            onembed: tuple with storage dimensions of the output data in memory
                     (can be None).
            ostride: distance between two successive output elements
                     in the least significant (i.e., innermost) dimension.
            odist: distance between the first element of two consecutive
                   signals in a batch of the output data.

        Will assign self.execute based on fft_type.

        Returns:
            Required work size.
        """
        rank = len(xyz)
        n = ffi.new("int[]", rank)
        n[0:rank] = xyz
        if inembed is None:
            _inembed = ffi.NULL
        else:
            _inembed = ffi.new("int[]", rank)
            _inembed[0:rank] = inembed
        if onembed is None:
            _onembed = ffi.NULL
        else:
            _onembed = ffi.new("int[]", rank)
            _onembed[0:rank] = onembed
        sz = ffi.new("size_t[]", 4)
        err = self._lib.cufftMakePlanMany(self.handle, rank, n,
                                          _inembed, istride, idist,
                                          _onembed, ostride, odist,
                                          fft_type, batch, sz)
        if err:
            raise CU.error("cufftMakePlanMany", err)
        self.execute = {
            CUFFT_R2C: self.exec_r2c,
            CUFFT_C2R: self.exec_c2r,
            CUFFT_C2C: self.exec_c2c,
            CUFFT_D2Z: self.exec_d2z,
            CUFFT_Z2D: self.exec_z2d,
            CUFFT_Z2Z: self.exec_z2z
        }.get(fft_type, self._exec_unknown)
        return int(sz[0])

Example #29

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

 def get_convolution_2d_forward_output_dim(conv_desc, input_desc,
                                           filter_desc):
     """Returns tuple of n, c, h, w for an output.
     """
     n, c, h, w = (ffi.new("int *") for _ in range(4))
     err = lib.cudnnGetConvolution2dForwardOutputDim(
         conv_desc, input_desc, filter_desc, n, c, h, w)
     if err:
         raise CU.error("cudnnGetConvolution2dForwardOutputDim", err)
     return int(n[0]), int(c[0]), int(h[0]), int(w[0])

Example #30

0

Show file

File: _cublas.py Project: Samsung/cuda4py

    def set_pointer_mode(self, mode=CUBLAS_POINTER_MODE_DEVICE):
        """Sets the pointer mode used by the cuBLAS library.

        Parameters:
            mode: CUBLAS_POINTER_MODE_HOST or CUBLAS_POINTER_MODE_DEVICE
                  (the default cuBLAS mode is CUBLAS_POINTER_MODE_HOST).
        """
        err = self._lib.cublasSetPointerMode_v2(self.handle, mode)
        if err:
            raise CU.error("cublasSetPointerMode_v2", err)

Example #31

0

Show file

    def set_pointer_mode(self, mode=CUBLAS_POINTER_MODE_DEVICE):
        """Sets the pointer mode used by the cuBLAS library.

        Parameters:
            mode: CUBLAS_POINTER_MODE_HOST or CUBLAS_POINTER_MODE_DEVICE
                  (the default cuBLAS mode is CUBLAS_POINTER_MODE_HOST).
        """
        err = self._lib.cublasSetPointerMode_v2(self.handle, mode)
        if err:
            raise CU.error("cublasSetPointerMode_v2", err)

Example #32

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

 def get_convolution_forward_workspace_size(
         self, src_desc, filter_desc, conv_dec, dest_desc, algo):
     """Returns required size of the additional temporary buffer
     for the specified forward convolution algorithm.
     """
     size = ffi.new("size_t *")
     err = self._lib.cudnnGetConvolutionForwardWorkspaceSize(
         self.handle, src_desc, filter_desc, conv_dec, dest_desc,
         algo, size)
     if err:
         raise CU.error("cudnnGetConvolutionForwardWorkspaceSize", err)
     return int(size[0])

Example #33

0

Show file

File: _cublas.py Project: Samsung/cuda4py

    def dgemm(self, transA, transB,
              rowsCountA, columnCountB, commonSideLength,
              alpha, A, B, beta, C,
              strideA=0, strideB=0, strideC=0):
        """Double precision (double) GEneral Matrix Multiplication.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB if transB != CUBLAS_OP_N
                       else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasDgemm_v2(
            self.handle, transA, transB, rowsCountA, columnCountB,
            commonSideLength, CU.extract_ptr(alpha), A, strideA,
            B, strideB, CU.extract_ptr(beta), C, strideC)
        if err:
            raise CU.error("cublasDgemm_v2", err)

Example #34

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

 def get_convolution_forward_algorithm(
         self, src_desc, filter_desc, conv_dec, dest_desc,
         preference=CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, memory_limit=0):
     """Returns forward algorithm based on parameters.
     """
     algo = ffi.new("cudnnConvolutionFwdAlgo_t *")
     err = self._lib.cudnnGetConvolutionForwardAlgorithm(
         self.handle, src_desc, filter_desc, conv_dec, dest_desc,
         preference, memory_limit, algo)
     if err:
         raise CU.error("cudnnGetConvolutionForwardAlgorithm", err)
     return int(algo[0])

Example #35

0

Show file

File: _cublas.py Project: Samsung/cuda4py

 def __init__(self, context):
     self._context = context
     self._lib = None
     context._add_ref(self)
     initialize()
     handle = ffi.new("cublasHandle_t *")
     with context:
         err = lib.cublasCreate_v2(handle)
     if err:
         self._handle = None
         raise CU.error("cublasCreate_v2", err)
     self._lib = lib  # to hold the reference
     self._handle = handle[0]

Example #36

0

Show file

 def __init__(self, context):
     self._context = context
     self._lib = None
     context._add_ref(self)
     initialize()
     handle = ffi.new("cublasHandle_t *")
     with context:
         err = lib.cublasCreate_v2(handle)
     if err:
         self._handle = None
         raise CU.error("cublasCreate_v2", err)
     self._lib = lib  # to hold the reference
     self._handle = handle[0]

Example #37

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

    def set_2d(self, window_hw, padding_vh, stride_vh, mode=CUDNN_POOLING_MAX):
        """Initializes tensor descriptor into a 4D tensor.

        Parameters:
            window_hw: tuple of ints for pooling window (height, width).
            padding_vh: tuple for padding (vertical, horizontal).
            stride_vh: tuple for stride (vertical, horizontal).
            mode: pooling mode.
        """
        err = self._lib.cudnnSetPooling2dDescriptor(
            self.handle, mode, window_hw[0], window_hw[1],
            padding_vh[0], padding_vh[1], stride_vh[0], stride_vh[1])
        if err:
            raise CU.error("cudnnSetPooling2dDescriptor", err)

Example #38

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

    def set_4d(self, data_type, k, c, h, w):
        """Initializes tensor descriptor into a 4D tensor.

        Parameters:
            data_type: CUDNN_DATA_FLOAT or CUDNN_DATA_DOUBLE.
            k: number of kernels.
            c: number of image channels.
            h: image height.
            w: image width.
        """
        err = self._lib.cudnnSetFilter4dDescriptor(
            self.handle, data_type, k, c, h, w)
        if err:
            raise CU.error("cudnnSetFilter4dDescriptor", err)

Example #39

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

    def transform_tensor(self, alpha, src_desc, src_data,
                         beta, dest_desc, dest_data):
        """Transforms data from one layout to another
        (interleaved to splitted for example).

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
        """
        err = self._lib.cudnnTransformTensor(
            self.handle, CU.extract_ptr(alpha), src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnTransformTensor", err)

Example #40

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

 def __init__(self, context):
     self._context = context
     self._lib = None
     context._add_ref(self)
     initialize()
     self.version = int(lib.cudnnGetVersion())
     handle = ffi.new("cudnnHandle_t *")
     with context:
         err = lib.cudnnCreate(handle)
     if err:
         self._handle = None
         raise CU.error("cudnnCreate", err)
     self._lib = lib  # to hold the reference
     self._handle = int(handle[0])

Example #41

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

    def pooling_forward(self, pooling_desc, alpha, src_desc, src_data,
                        beta, dest_desc, dest_data):
        """Does pooling forward propagation.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
        """
        err = self._lib.cudnnPoolingForward(
            self.handle, pooling_desc, CU.extract_ptr(alpha),
            src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnPoolingForward", err)

Example #42

0

Show file

File: _curand.py Project: ra2003/cuda4py

    def generate64(self, dst, count=None):
        """Generates specified number of 64-bit random values.

        Valid only for 64-bit generators.

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 64-bit values to put to dst or
                   None to fill full dst when the it's size is available.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 8)
        err = self._lib.curandGenerateLongLong(self.handle, dst, count)
        if err:
            raise CU.error("curandGenerateLongLong", err)

Example #43

0

Show file

File: _curand.py Project: ra2003/cuda4py

    def generate_uniform_double(self, dst, count=None):
        """Generates specified number of 64-bit uniformly distributed floats.

        Will generate values in range (0, 1].

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 64-bit floats to put to dst or
                   None to fill full dst when the it's size is available.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 8)
        err = self._lib.curandGenerateUniformDouble(self.handle, dst, count)
        if err:
            raise CU.error("curandGenerateUniformDouble", err)

Example #44

0

Show file

File: _curand.py Project: Samsung/cuda4py

    def generate_uniform(self, dst, count=None):
        """Generates specified number of 32-bit uniformly distributed floats.

        Will generate values in range (0, 1].

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 32-bit floats to put to dst or
                   None to fill full dst when the it's size is available.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 4)
        err = self._lib.curandGenerateUniform(self.handle, dst, count)
        if err:
            raise CU.error("curandGenerateUniform", err)

Example #45

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

    def set_4d(self, fmt, data_type, n, c, h, w):
        """Initializes tensor descriptor into a 4D tensor.

        Parameters:
            fmt: CUDNN_TENSOR_NCHW or CUDNN_TENSOR_NHWC.
            data_type: CUDNN_DATA_FLOAT or CUDNN_DATA_DOUBLE.
            n: number of images.
            c: number of image channels.
            h: image height.
            w: image width.
        """
        err = self._lib.cudnnSetTensor4dDescriptor(
            self.handle, fmt, data_type, n, c, h, w)
        if err:
            raise CU.error("cudnnSetTensor4dDescriptor", err)

Example #46

0

Show file

File: _cudnn.py Project: nagyistge/cuda4py

    def convolution_backward_bias(self, alpha, src_desc, src_data,
                                  beta, dest_desc, dest_data):
        """Computes gradient for the bias.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
            src_data: error for backpropagation.
            dest_data: gradient for the bias.
        """
        err = self._lib.cudnnConvolutionBackwardBias(
            self.handle, CU.extract_ptr(alpha), src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardBias", err)

Example #47

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def __init__(self, context):
     self._context = context
     self._lib = None
     context._add_ref(self)
     initialize()
     handle = ffi.new("cufftHandle *")
     with context:
         err = lib.cufftCreate(handle)
     if err:
         self._handle = None
         raise CU.error("cufftCreate", err)
     self._lib = lib  # to hold the reference
     self._handle = int(handle[0])
     self._auto_allocation = True
     self._workarea = None
     self.execute = self._exec_unknown

Example #48

0

Show file

File: _curand.py Project: ra2003/cuda4py

    def generate_normal_double(self, dst, count=None, mean=0.0, stddev=1.0):
        """Generates specified number of 64-bit normally distributed floats.

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 64-bit floats to put to dst or
                   None to fill full dst when the it's size is available.
            mean: mean of normal distribution to generate.
            stddev: stddev of normal distribution to generate.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 8)
        err = self._lib.curandGenerateNormalDouble(self.handle, dst, count,
                                                   float(mean), float(stddev))
        if err:
            raise CU.error("curandGenerateNormalDouble", err)

Example #49

0

Show file

File: _curand.py Project: ra2003/cuda4py

    def generate_poisson(self, dst, count=None, lam=1.0):
        """Generates specified number of 32-bit unsigned int point values
        with Poisson distribution.

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 32-bit unsigned ints to put to dst or
                   None to fill full dst when the it's size is available.
            lam: lambda value of Poisson distribution.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 4)
        err = self._lib.curandGeneratePoisson(self.handle, dst, count,
                                              float(lam))
        if err:
            raise CU.error("curandGeneratePoisson", err)

Example #50

0

Show file

File: _cufft.py Project: ra2003/cuda4py

 def auto_allocation(self, value):
     alloc = bool(value)
     err = self._lib.cufftSetAutoAllocation(self.handle, alloc)
     if err:
         raise CU.error("cufftSetAutoAllocation", err)
     self._auto_allocation = alloc

Example #51

0

Show file

File: _cufft.py Project: ra2003/cuda4py

    def make_plan_many(self,
                       xyz,
                       batch,
                       fft_type,
                       inembed=None,
                       istride=1,
                       idist=0,
                       onembed=None,
                       ostride=1,
                       odist=0):
        """Makes 1, 2 or 3 dimensional FFT plan.

        Parameters:
            xyz: tuple of dimensions.
            batch: number of FFTs to make.
            fft_type: type of FFT (CUFFT_R2C, CUFFT_C2R etc.).
            inembed: tuple with storage dimensions of the input data in memory
                     (can be None).
            istride: distance between two successive input elements
                     in the least significant (i.e., innermost) dimension.
            idist: distance between the first element of two consecutive
                   signals in a batch of the input data.
            onembed: tuple with storage dimensions of the output data in memory
                     (can be None).
            ostride: distance between two successive output elements
                     in the least significant (i.e., innermost) dimension.
            odist: distance between the first element of two consecutive
                   signals in a batch of the output data.

        Will assign self.execute based on fft_type.

        Returns:
            Required work size.
        """
        rank = len(xyz)
        n = ffi.new("int[]", rank)
        n[0:rank] = xyz
        if inembed is None:
            _inembed = ffi.NULL
        else:
            _inembed = ffi.new("int[]", rank)
            _inembed[0:rank] = inembed
        if onembed is None:
            _onembed = ffi.NULL
        else:
            _onembed = ffi.new("int[]", rank)
            _onembed[0:rank] = onembed
        sz = ffi.new("size_t[]", 4)
        err = self._lib.cufftMakePlanMany(self.handle, rank, n, _inembed,
                                          istride, idist, _onembed, ostride,
                                          odist, fft_type, batch, sz)
        if err:
            raise CU.error("cufftMakePlanMany", err)
        self.execute = {
            CUFFT_R2C: self.exec_r2c,
            CUFFT_C2R: self.exec_c2r,
            CUFFT_C2C: self.exec_c2c,
            CUFFT_D2Z: self.exec_d2z,
            CUFFT_Z2D: self.exec_z2d,
            CUFFT_Z2Z: self.exec_z2z
        }.get(fft_type, self._exec_unknown)
        return int(sz[0])

Example #52

0

Show file

    def sgemm_ex(self,
                 transA,
                 transB,
                 rowsCountA,
                 columnCountB,
                 commonSideLength,
                 alpha,
                 A,
                 B,
                 beta,
                 C,
                 strideA=0,
                 strideB=0,
                 strideC=0,
                 dtypeA=CUBLAS_DATA_HALF,
                 dtypeB=CUBLAS_DATA_HALF,
                 dtypeC=CUBLAS_DATA_HALF):
        """Single precision (float) GEneral Matrix Multiplication
        with support of different data types for each matrix.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.
            dtypeA: data type of matrix A
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).
            dtypeB: data type of matrix B
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).
            dtypeC: data type of matrix C
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB
                       if transB != CUBLAS_OP_N else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasSgemmEx(self.handle, transA, transB, rowsCountA,
                                      columnCountB, commonSideLength,
                                      CU.extract_ptr(alpha), A, dtypeA,
                                      strideA, B, dtypeB, strideB,
                                      CU.extract_ptr(beta), C, dtypeC, strideC)
        if err:
            raise CU.error("cublasSgemmEx", err)

Example #53

0

Show file

    def dgemm(self,
              transA,
              transB,
              rowsCountA,
              columnCountB,
              commonSideLength,
              alpha,
              A,
              B,
              beta,
              C,
              strideA=0,
              strideB=0,
              strideC=0):
        """Double precision (double) GEneral Matrix Multiplication.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB
                       if transB != CUBLAS_OP_N else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasDgemm_v2(self.handle, transA, transB, rowsCountA,
                                       columnCountB, commonSideLength,
                                       CU.extract_ptr(alpha),
                                       A, strideA, B, strideB,
                                       CU.extract_ptr(beta), C, strideC)
        if err:
            raise CU.error("cublasDgemm_v2", err)