Exemple #1
0
    def dgemm(self, transA, transB,
              rowsCountA, columnCountB, commonSideLength,
              alpha, A, B, beta, C,
              strideA=0, strideB=0, strideC=0):
        """Double precision (double) GEneral Matrix Multiplication.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB if transB != CUBLAS_OP_N
                       else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasDgemm_v2(
            self.handle, transA, transB, rowsCountA, columnCountB,
            commonSideLength, CU.extract_ptr(alpha), A, strideA,
            B, strideB, CU.extract_ptr(beta), C, strideC)
        if err:
            raise CU.error("cublasDgemm_v2", err)
Exemple #2
0
    def pooling_forward(self, pooling_desc, alpha, src_desc, src_data,
                        beta, dest_desc, dest_data):
        """Does pooling forward propagation.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
        """
        err = self._lib.cudnnPoolingForward(
            self.handle, pooling_desc, CU.extract_ptr(alpha),
            src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnPoolingForward", err)
Exemple #3
0
    def transform_tensor(self, alpha, src_desc, src_data,
                         beta, dest_desc, dest_data):
        """Transforms data from one layout to another
        (interleaved to splitted for example).

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
        """
        err = self._lib.cudnnTransformTensor(
            self.handle, CU.extract_ptr(alpha), src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnTransformTensor", err)
Exemple #4
0
    def convolution_backward_bias(self, alpha, src_desc, src_data,
                                  beta, dest_desc, dest_data):
        """Computes gradient for the bias.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
            src_data: error for backpropagation.
            dest_data: gradient for the bias.
        """
        err = self._lib.cudnnConvolutionBackwardBias(
            self.handle, CU.extract_ptr(alpha), src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardBias", err)
Exemple #5
0
 def offset(self, value):
     """Sets generator offset as an 64-bit integer.
     """
     err = self._lib.curandSetGeneratorOffset(self.handle, int(value))
     if err:
         raise CU.error("curandSetGeneratorOffset", err)
     self._offset = int(value)
Exemple #6
0
    def __init__(self, context, rng_type=CURAND_RNG_PSEUDO_DEFAULT):
        """Constructor.

        Parameters:
            context: CUDA context handle or None to use the host generator.
            rng_type: type of the random generator.
        """
        self._context = context
        self._lib = None
        if context is not None:
            context._add_ref(self)
        initialize()
        handle = ffi.new("curandGenerator_t *")
        if context is not None:
            with context:
                err = lib.curandCreateGenerator(handle, int(rng_type))
        else:
            err = lib.curandCreateGeneratorHost(handle, int(rng_type))
        if err:
            self._handle = None
            raise CU.error(
                "curandCreateGenerator"
                if context is not None else "curandCreateGeneratorHost", err)
        self._lib = lib  # to hold the reference
        self._handle = int(handle[0])
        self._rng_type = int(rng_type)
        self._seed = 0
        self._offset = 0
        self._ordering = 0
        self._dimensions = 0
Exemple #7
0
 def ordering(self, value):
     """Sets generator ordering.
     """
     err = self._lib.curandSetGeneratorOrdering(self.handle, int(value))
     if err:
         raise CU.error("curandSetGeneratorOrdering", err)
     self._ordering = int(value)
Exemple #8
0
 def exec_c2c(self, idata, odata, direction):
     """Executes a single-precision complex-to-complex
     cuFFT transform plan.
     """
     err = self._lib.cufftExecC2C(self.handle, idata, odata, direction)
     if err:
         raise CU.error("cufftExecC2C", err)
Exemple #9
0
 def exec_z2z(self, idata, odata, direction):
     """Executes a double-precision complex-to-complex
     cuFFT transform plan.
     """
     err = self._lib.cufftExecZ2Z(self.handle, idata, odata, direction)
     if err:
         raise CU.error("cufftExecZ2Z", err)
Exemple #10
0
 def exec_z2d(self, idata, odata):
     """Executes a double-precision complex-to-real,
     implicitly inverse, cuFFT transform plan.
     """
     err = self._lib.cufftExecZ2D(self.handle, idata, odata)
     if err:
         raise CU.error("cufftExecZ2D", err)
Exemple #11
0
 def exec_d2z(self, idata, odata):
     """Executes a double-precision real-to-complex,
     implicitly forward, cuFFT transform plan.
     """
     err = self._lib.cufftExecD2Z(self.handle, idata, odata)
     if err:
         raise CU.error("cufftExecD2Z", err)
Exemple #12
0
 def workarea(self, value):
     """Sets workarea for plan execution.
     """
     err = self._lib.cufftSetWorkArea(self.handle, value)
     if err:
         raise CU.error("cufftSetWorkArea", err)
     self._workarea = value
Exemple #13
0
 def exec_z2z(self, idata, odata, direction):
     """Executes a double-precision complex-to-complex
     cuFFT transform plan.
     """
     err = self._lib.cufftExecZ2Z(self.handle, idata, odata, direction)
     if err:
         raise CU.error("cufftExecZ2Z", err)
Exemple #14
0
 def exec_d2z(self, idata, odata):
     """Executes a double-precision real-to-complex,
     implicitly forward, cuFFT transform plan.
     """
     err = self._lib.cufftExecD2Z(self.handle, idata, odata)
     if err:
         raise CU.error("cufftExecD2Z", err)
Exemple #15
0
 def exec_z2d(self, idata, odata):
     """Executes a double-precision complex-to-real,
     implicitly inverse, cuFFT transform plan.
     """
     err = self._lib.cufftExecZ2D(self.handle, idata, odata)
     if err:
         raise CU.error("cufftExecZ2D", err)
Exemple #16
0
 def exec_c2c(self, idata, odata, direction):
     """Executes a single-precision complex-to-complex
     cuFFT transform plan.
     """
     err = self._lib.cufftExecC2C(self.handle, idata, odata, direction)
     if err:
         raise CU.error("cufftExecC2C", err)
Exemple #17
0
 def workarea(self, value):
     """Sets workarea for plan execution.
     """
     err = self._lib.cufftSetWorkArea(self.handle, value)
     if err:
         raise CU.error("cufftSetWorkArea", err)
     self._workarea = value
Exemple #18
0
    def __init__(self, context, rng_type=CURAND_RNG_PSEUDO_DEFAULT):
        """Constructor.

        Parameters:
            context: CUDA context handle or None to use the host generator.
            rng_type: type of the random generator.
        """
        self._context = context
        self._lib = None
        if context is not None:
            context._add_ref(self)
        initialize()
        handle = ffi.new("curandGenerator_t *")
        if context is not None:
            with context:
                err = lib.curandCreateGenerator(handle, int(rng_type))
        else:
            err = lib.curandCreateGeneratorHost(handle, int(rng_type))
        if err:
            self._handle = None
            raise CU.error("curandCreateGenerator" if context is not None
                           else "curandCreateGeneratorHost", err)
        self._lib = lib  # to hold the reference
        self._handle = int(handle[0])
        self._rng_type = int(rng_type)
        self._seed = 0
        self._offset = 0
        self._ordering = 0
        self._dimensions = 0
Exemple #19
0
 def seed(self, value):
     """Sets generator seed as an 64-bit integer.
     """
     err = self._lib.curandSetPseudoRandomGeneratorSeed(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetPseudoRandomGeneratorSeed", err)
     self._seed = int(value)
Exemple #20
0
 def offset(self, value):
     """Sets generator offset as an 64-bit integer.
     """
     err = self._lib.curandSetGeneratorOffset(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetGeneratorOffset", err)
     self._offset = int(value)
Exemple #21
0
 def dimensions(self, value):
     """Sets quasirandom generator dimensions.
     """
     err = self._lib.curandSetQuasiRandomGeneratorDimensions(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetQuasiRandomGeneratorDimensions", err)
     self._dimensions = int(value)
Exemple #22
0
 def seed(self, value):
     """Sets generator seed as an 64-bit integer.
     """
     err = self._lib.curandSetPseudoRandomGeneratorSeed(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetPseudoRandomGeneratorSeed", err)
     self._seed = int(value)
Exemple #23
0
 def ordering(self, value):
     """Sets generator ordering.
     """
     err = self._lib.curandSetGeneratorOrdering(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetGeneratorOrdering", err)
     self._ordering = int(value)
Exemple #24
0
 def size(self):
     """Returns actual size of the work area required to support the plan.
     """
     sz = ffi.new("size_t[]", 4)
     err = self._lib.cufftGetSize(self.handle, sz)
     if err:
         raise CU.error("cufftGetSize", err)
     return int(sz[0])
Exemple #25
0
 def version(self):
     """Returns cuFFT version.
     """
     version = ffi.new("int *")
     err = self._lib.cufftGetVersion(version)
     if err:
         raise CU.error("cufftGetVersion", err)
     return int(version[0])
Exemple #26
0
 def size(self):
     """Returns actual size of the work area required to support the plan.
     """
     sz = ffi.new("size_t[]", 4)
     err = self._lib.cufftGetSize(self.handle, sz)
     if err:
         raise CU.error("cufftGetSize", err)
     return int(sz[0])
Exemple #27
0
 def version(self):
     """Returns cuFFT version.
     """
     version = ffi.new("int *")
     err = self._lib.cufftGetVersion(version)
     if err:
         raise CU.error("cufftGetVersion", err)
     return int(version[0])
Exemple #28
0
 def dimensions(self, value):
     """Sets quasirandom generator dimensions.
     """
     err = self._lib.curandSetQuasiRandomGeneratorDimensions(
         self.handle, int(value))
     if err:
         raise CU.error("curandSetQuasiRandomGeneratorDimensions", err)
     self._dimensions = int(value)
Exemple #29
0
    def convolution_backward_filter(
            self, alpha, src_desc, src_data, diff_desc, diff_data, conv_desc,
            beta, grad_desc, grad_data,
            algo=None, workspace=None, workspace_size=0):
        """Computes gradient for the convolutional kernels.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: grad_data multiplier (numpy array with one element).
            src_data: input from the forward pass.
            diff_data: error for backpropagation.
            grad_data: gradient for convolutional kernels.
        """
        if self.version < 4000:
            err = self._lib.cudnnConvolutionBackwardFilter(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        elif algo is None:
            err = self._lib.cudnnConvolutionBackwardFilter_v2(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        else:
            err = self._lib.cudnnConvolutionBackwardFilter(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                algo, workspace, workspace_size,
                CU.extract_ptr(beta), grad_desc, grad_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardFilter", err)
Exemple #30
0
    def convolution_backward_data(
            self, alpha, filter_desc, filter_data, diff_desc, diff_data,
            conv_desc, beta, grad_desc, grad_data,
            algo=None, workspace=None, workspace_size=0):
        """Computes backpropagated error.

        Parameters:
            alpha: diff_data multiplier (numpy array with one element).
            beta: grad_data multiplier (numpy array with one element).
            filter_data: convolutional kernels.
            diff_data: error for backpropagation.
            grad_data: backpropagated error.
        """
        if self.version < 4000:
            err = self._lib.cudnnConvolutionBackwardData(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        elif algo is None:
            err = self._lib.cudnnConvolutionBackwardData_v2(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        else:
            err = self._lib.cudnnConvolutionBackwardData(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                algo, workspace, workspace_size,
                CU.extract_ptr(beta), grad_desc, grad_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardData", err)
Exemple #31
0
 def get_pooling_2d_forward_output_dim(pooling_desc, input_desc):
     """Returns tuple of n, c, h, w for an output.
     """
     n, c, h, w = (ffi.new("int *") for _ in range(4))
     err = lib.cudnnGetPooling2dForwardOutputDim(
         pooling_desc, input_desc, n, c, h, w)
     if err:
         raise CU.error("cudnnGetPooling2dForwardOutputDim", err)
     return int(n[0]), int(c[0]), int(h[0]), int(w[0])
Exemple #32
0
    def make_plan_many(self, xyz, batch, fft_type,
                       inembed=None, istride=1, idist=0,
                       onembed=None, ostride=1, odist=0):
        """Makes 1, 2 or 3 dimensional FFT plan.

        Parameters:
            xyz: tuple of dimensions.
            batch: number of FFTs to make.
            fft_type: type of FFT (CUFFT_R2C, CUFFT_C2R etc.).
            inembed: tuple with storage dimensions of the input data in memory
                     (can be None).
            istride: distance between two successive input elements
                     in the least significant (i.e., innermost) dimension.
            idist: distance between the first element of two consecutive
                   signals in a batch of the input data.
            onembed: tuple with storage dimensions of the output data in memory
                     (can be None).
            ostride: distance between two successive output elements
                     in the least significant (i.e., innermost) dimension.
            odist: distance between the first element of two consecutive
                   signals in a batch of the output data.

        Will assign self.execute based on fft_type.

        Returns:
            Required work size.
        """
        rank = len(xyz)
        n = ffi.new("int[]", rank)
        n[0:rank] = xyz
        if inembed is None:
            _inembed = ffi.NULL
        else:
            _inembed = ffi.new("int[]", rank)
            _inembed[0:rank] = inembed
        if onembed is None:
            _onembed = ffi.NULL
        else:
            _onembed = ffi.new("int[]", rank)
            _onembed[0:rank] = onembed
        sz = ffi.new("size_t[]", 4)
        err = self._lib.cufftMakePlanMany(self.handle, rank, n,
                                          _inembed, istride, idist,
                                          _onembed, ostride, odist,
                                          fft_type, batch, sz)
        if err:
            raise CU.error("cufftMakePlanMany", err)
        self.execute = {
            CUFFT_R2C: self.exec_r2c,
            CUFFT_C2R: self.exec_c2r,
            CUFFT_C2C: self.exec_c2c,
            CUFFT_D2Z: self.exec_d2z,
            CUFFT_Z2D: self.exec_z2d,
            CUFFT_Z2Z: self.exec_z2z
        }.get(fft_type, self._exec_unknown)
        return int(sz[0])
Exemple #33
0
    def set_pointer_mode(self, mode=CUBLAS_POINTER_MODE_DEVICE):
        """Sets the pointer mode used by the cuBLAS library.

        Parameters:
            mode: CUBLAS_POINTER_MODE_HOST or CUBLAS_POINTER_MODE_DEVICE
                  (the default cuBLAS mode is CUBLAS_POINTER_MODE_HOST).
        """
        err = self._lib.cublasSetPointerMode_v2(self.handle, mode)
        if err:
            raise CU.error("cublasSetPointerMode_v2", err)
Exemple #34
0
    def convolution_forward(
            self, alpha, src_desc, src_data, filter_desc, filter_data,
            conv_desc, algo, workspace, workspace_size,
            beta, dest_desc, dest_data):
        """Does convolution forward propagation.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
        """
        size = ffi.new("size_t *")
        err = self._lib.cudnnConvolutionForward(
            self.handle, CU.extract_ptr(alpha), src_desc, src_data,
            filter_desc, filter_data, conv_desc,
            algo, workspace, workspace_size,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnConvolutionForward", err)
        return int(size[0])
Exemple #35
0
 def get_convolution_2d_forward_output_dim(conv_desc, input_desc,
                                           filter_desc):
     """Returns tuple of n, c, h, w for an output.
     """
     n, c, h, w = (ffi.new("int *") for _ in range(4))
     err = lib.cudnnGetConvolution2dForwardOutputDim(
         conv_desc, input_desc, filter_desc, n, c, h, w)
     if err:
         raise CU.error("cudnnGetConvolution2dForwardOutputDim", err)
     return int(n[0]), int(c[0]), int(h[0]), int(w[0])
Exemple #36
0
    def set_pointer_mode(self, mode=CUBLAS_POINTER_MODE_DEVICE):
        """Sets the pointer mode used by the cuBLAS library.

        Parameters:
            mode: CUBLAS_POINTER_MODE_HOST or CUBLAS_POINTER_MODE_DEVICE
                  (the default cuBLAS mode is CUBLAS_POINTER_MODE_HOST).
        """
        err = self._lib.cublasSetPointerMode_v2(self.handle, mode)
        if err:
            raise CU.error("cublasSetPointerMode_v2", err)
Exemple #37
0
    def pooling_backward(self, pooling_desc, alpha, output_desc, output_data,
                         diff_desc, diff_data, input_desc, input_data,
                         beta, grad_desc, grad_data):
        """Does pooling backward propagation.

        Parameters:
            alpha: diff_data multiplier (numpy array with one element).
            beta: grad_data multiplier (numpy array with one element).
            output: output of the forward propagation.
            diff: error for backpropagation.
            input: input of the forward propagation.
            grad: backpropagated error.
        """
        err = self._lib.cudnnPoolingBackward(
            self.handle, pooling_desc, CU.extract_ptr(alpha),
            output_desc, output_data,
            diff_desc, diff_data, input_desc, input_data,
            CU.extract_ptr(beta), grad_desc, grad_data)
        if err:
            raise CU.error("cudnnPoolingBackward", err)
Exemple #38
0
 def get_convolution_forward_workspace_size(
         self, src_desc, filter_desc, conv_dec, dest_desc, algo):
     """Returns required size of the additional temporary buffer
     for the specified forward convolution algorithm.
     """
     size = ffi.new("size_t *")
     err = self._lib.cudnnGetConvolutionForwardWorkspaceSize(
         self.handle, src_desc, filter_desc, conv_dec, dest_desc,
         algo, size)
     if err:
         raise CU.error("cudnnGetConvolutionForwardWorkspaceSize", err)
     return int(size[0])
Exemple #39
0
 def get_convolution_forward_algorithm(
         self, src_desc, filter_desc, conv_dec, dest_desc,
         preference=CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, memory_limit=0):
     """Returns forward algorithm based on parameters.
     """
     algo = ffi.new("cudnnConvolutionFwdAlgo_t *")
     err = self._lib.cudnnGetConvolutionForwardAlgorithm(
         self.handle, src_desc, filter_desc, conv_dec, dest_desc,
         preference, memory_limit, algo)
     if err:
         raise CU.error("cudnnGetConvolutionForwardAlgorithm", err)
     return int(algo[0])
Exemple #40
0
 def __init__(self, context):
     self._context = context
     self._lib = None
     context._add_ref(self)
     initialize()
     handle = ffi.new("cublasHandle_t *")
     with context:
         err = lib.cublasCreate_v2(handle)
     if err:
         self._handle = None
         raise CU.error("cublasCreate_v2", err)
     self._lib = lib  # to hold the reference
     self._handle = handle[0]
Exemple #41
0
 def __init__(self, context):
     self._context = context
     self._lib = None
     context._add_ref(self)
     initialize()
     handle = ffi.new("cublasHandle_t *")
     with context:
         err = lib.cublasCreate_v2(handle)
     if err:
         self._handle = None
         raise CU.error("cublasCreate_v2", err)
     self._lib = lib  # to hold the reference
     self._handle = handle[0]
Exemple #42
0
    def set_4d(self, data_type, k, c, h, w):
        """Initializes tensor descriptor into a 4D tensor.

        Parameters:
            data_type: CUDNN_DATA_FLOAT or CUDNN_DATA_DOUBLE.
            k: number of kernels.
            c: number of image channels.
            h: image height.
            w: image width.
        """
        err = self._lib.cudnnSetFilter4dDescriptor(
            self.handle, data_type, k, c, h, w)
        if err:
            raise CU.error("cudnnSetFilter4dDescriptor", err)
Exemple #43
0
 def __init__(self, context):
     self._context = context
     self._lib = None
     context._add_ref(self)
     initialize()
     self.version = int(lib.cudnnGetVersion())
     handle = ffi.new("cudnnHandle_t *")
     with context:
         err = lib.cudnnCreate(handle)
     if err:
         self._handle = None
         raise CU.error("cudnnCreate", err)
     self._lib = lib  # to hold the reference
     self._handle = int(handle[0])
Exemple #44
0
    def set_2d(self, window_hw, padding_vh, stride_vh, mode=CUDNN_POOLING_MAX):
        """Initializes tensor descriptor into a 4D tensor.

        Parameters:
            window_hw: tuple of ints for pooling window (height, width).
            padding_vh: tuple for padding (vertical, horizontal).
            stride_vh: tuple for stride (vertical, horizontal).
            mode: pooling mode.
        """
        err = self._lib.cudnnSetPooling2dDescriptor(
            self.handle, mode, window_hw[0], window_hw[1],
            padding_vh[0], padding_vh[1], stride_vh[0], stride_vh[1])
        if err:
            raise CU.error("cudnnSetPooling2dDescriptor", err)
Exemple #45
0
    def _extract_ptr_and_count(self, arr, count, itemsize):
        """Returns tuple of address of an arr and extracted item count
        casted to int.

        It will clamp requested count to an array size if possible.
        """
        if self.context is None:
            arr, size = CU.extract_ptr_and_size(arr, None)
        elif count is None:
            size = arr.size
        else:
            size = getattr(arr, "size", count * itemsize)
        size = size if count is None else min(count * itemsize, size)
        return int(arr), int(size) // itemsize
Exemple #46
0
    def generate64(self, dst, count=None):
        """Generates specified number of 64-bit random values.

        Valid only for 64-bit generators.

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 64-bit values to put to dst or
                   None to fill full dst when the it's size is available.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 8)
        err = self._lib.curandGenerateLongLong(self.handle, dst, count)
        if err:
            raise CU.error("curandGenerateLongLong", err)
Exemple #47
0
    def generate_uniform_double(self, dst, count=None):
        """Generates specified number of 64-bit uniformly distributed floats.

        Will generate values in range (0, 1].

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 64-bit floats to put to dst or
                   None to fill full dst when the it's size is available.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 8)
        err = self._lib.curandGenerateUniformDouble(self.handle, dst, count)
        if err:
            raise CU.error("curandGenerateUniformDouble", err)
Exemple #48
0
 def __init__(self, context):
     self._context = context
     self._lib = None
     context._add_ref(self)
     initialize()
     handle = ffi.new("cufftHandle *")
     with context:
         err = lib.cufftCreate(handle)
     if err:
         self._handle = None
         raise CU.error("cufftCreate", err)
     self._lib = lib  # to hold the reference
     self._handle = int(handle[0])
     self._auto_allocation = True
     self._workarea = None
     self.execute = self._exec_unknown
Exemple #49
0
    def generate_normal_double(self, dst, count=None, mean=0.0, stddev=1.0):
        """Generates specified number of 64-bit normally distributed floats.

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 64-bit floats to put to dst or
                   None to fill full dst when the it's size is available.
            mean: mean of normal distribution to generate.
            stddev: stddev of normal distribution to generate.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 8)
        err = self._lib.curandGenerateNormalDouble(self.handle, dst, count,
                                                   float(mean), float(stddev))
        if err:
            raise CU.error("curandGenerateNormalDouble", err)
Exemple #50
0
    def generate_poisson(self, dst, count=None, lam=1.0):
        """Generates specified number of 32-bit unsigned int point values
        with Poisson distribution.

        Parameters:
            dst: buffer to store the results or
                 numpy array in case of host generator.
            count: number of 32-bit unsigned ints to put to dst or
                   None to fill full dst when the it's size is available.
            lam: lambda value of Poisson distribution.
        """
        dst, count = self._extract_ptr_and_count(dst, count, 4)
        err = self._lib.curandGeneratePoisson(self.handle, dst, count,
                                              float(lam))
        if err:
            raise CU.error("curandGeneratePoisson", err)
Exemple #51
0
 def auto_allocation(self, value):
     alloc = bool(value)
     err = self._lib.cufftSetAutoAllocation(self.handle, alloc)
     if err:
         raise CU.error("cufftSetAutoAllocation", err)
     self._auto_allocation = alloc
Exemple #52
0
    def dgemm(self,
              transA,
              transB,
              rowsCountA,
              columnCountB,
              commonSideLength,
              alpha,
              A,
              B,
              beta,
              C,
              strideA=0,
              strideB=0,
              strideC=0):
        """Double precision (double) GEneral Matrix Multiplication.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB
                       if transB != CUBLAS_OP_N else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasDgemm_v2(self.handle, transA, transB, rowsCountA,
                                       columnCountB, commonSideLength,
                                       CU.extract_ptr(alpha),
                                       A, strideA, B, strideB,
                                       CU.extract_ptr(beta), C, strideC)
        if err:
            raise CU.error("cublasDgemm_v2", err)
Exemple #53
0
    def make_plan_many(self,
                       xyz,
                       batch,
                       fft_type,
                       inembed=None,
                       istride=1,
                       idist=0,
                       onembed=None,
                       ostride=1,
                       odist=0):
        """Makes 1, 2 or 3 dimensional FFT plan.

        Parameters:
            xyz: tuple of dimensions.
            batch: number of FFTs to make.
            fft_type: type of FFT (CUFFT_R2C, CUFFT_C2R etc.).
            inembed: tuple with storage dimensions of the input data in memory
                     (can be None).
            istride: distance between two successive input elements
                     in the least significant (i.e., innermost) dimension.
            idist: distance between the first element of two consecutive
                   signals in a batch of the input data.
            onembed: tuple with storage dimensions of the output data in memory
                     (can be None).
            ostride: distance between two successive output elements
                     in the least significant (i.e., innermost) dimension.
            odist: distance between the first element of two consecutive
                   signals in a batch of the output data.

        Will assign self.execute based on fft_type.

        Returns:
            Required work size.
        """
        rank = len(xyz)
        n = ffi.new("int[]", rank)
        n[0:rank] = xyz
        if inembed is None:
            _inembed = ffi.NULL
        else:
            _inembed = ffi.new("int[]", rank)
            _inembed[0:rank] = inembed
        if onembed is None:
            _onembed = ffi.NULL
        else:
            _onembed = ffi.new("int[]", rank)
            _onembed[0:rank] = onembed
        sz = ffi.new("size_t[]", 4)
        err = self._lib.cufftMakePlanMany(self.handle, rank, n, _inembed,
                                          istride, idist, _onembed, ostride,
                                          odist, fft_type, batch, sz)
        if err:
            raise CU.error("cufftMakePlanMany", err)
        self.execute = {
            CUFFT_R2C: self.exec_r2c,
            CUFFT_C2R: self.exec_c2r,
            CUFFT_C2C: self.exec_c2c,
            CUFFT_D2Z: self.exec_d2z,
            CUFFT_Z2D: self.exec_z2d,
            CUFFT_Z2Z: self.exec_z2z
        }.get(fft_type, self._exec_unknown)
        return int(sz[0])
Exemple #54
0
    def sgemm_ex(self,
                 transA,
                 transB,
                 rowsCountA,
                 columnCountB,
                 commonSideLength,
                 alpha,
                 A,
                 B,
                 beta,
                 C,
                 strideA=0,
                 strideB=0,
                 strideC=0,
                 dtypeA=CUBLAS_DATA_HALF,
                 dtypeB=CUBLAS_DATA_HALF,
                 dtypeC=CUBLAS_DATA_HALF):
        """Single precision (float) GEneral Matrix Multiplication
        with support of different data types for each matrix.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.
            dtypeA: data type of matrix A
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).
            dtypeB: data type of matrix B
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).
            dtypeC: data type of matrix C
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB
                       if transB != CUBLAS_OP_N else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasSgemmEx(self.handle, transA, transB, rowsCountA,
                                      columnCountB, commonSideLength,
                                      CU.extract_ptr(alpha), A, dtypeA,
                                      strideA, B, dtypeB, strideB,
                                      CU.extract_ptr(beta), C, dtypeC, strideC)
        if err:
            raise CU.error("cublasSgemmEx", err)