コード例 #1
0
ファイル: _cublas.py プロジェクト: Samsung/cuda4py
    def dgemm(self, transA, transB,
              rowsCountA, columnCountB, commonSideLength,
              alpha, A, B, beta, C,
              strideA=0, strideB=0, strideC=0):
        """Double precision (double) GEneral Matrix Multiplication.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB if transB != CUBLAS_OP_N
                       else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasDgemm_v2(
            self.handle, transA, transB, rowsCountA, columnCountB,
            commonSideLength, CU.extract_ptr(alpha), A, strideA,
            B, strideB, CU.extract_ptr(beta), C, strideC)
        if err:
            raise CU.error("cublasDgemm_v2", err)
コード例 #2
0
ファイル: _cudnn.py プロジェクト: nagyistge/cuda4py
    def transform_tensor(self, alpha, src_desc, src_data,
                         beta, dest_desc, dest_data):
        """Transforms data from one layout to another
        (interleaved to splitted for example).

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
        """
        err = self._lib.cudnnTransformTensor(
            self.handle, CU.extract_ptr(alpha), src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnTransformTensor", err)
コード例 #3
0
ファイル: _cudnn.py プロジェクト: nagyistge/cuda4py
    def pooling_forward(self, pooling_desc, alpha, src_desc, src_data,
                        beta, dest_desc, dest_data):
        """Does pooling forward propagation.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
        """
        err = self._lib.cudnnPoolingForward(
            self.handle, pooling_desc, CU.extract_ptr(alpha),
            src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnPoolingForward", err)
コード例 #4
0
ファイル: _cudnn.py プロジェクト: nagyistge/cuda4py
    def convolution_backward_bias(self, alpha, src_desc, src_data,
                                  beta, dest_desc, dest_data):
        """Computes gradient for the bias.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
            src_data: error for backpropagation.
            dest_data: gradient for the bias.
        """
        err = self._lib.cudnnConvolutionBackwardBias(
            self.handle, CU.extract_ptr(alpha), src_desc, src_data,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardBias", err)
コード例 #5
0
ファイル: _cudnn.py プロジェクト: nagyistge/cuda4py
    def convolution_forward(
            self, alpha, src_desc, src_data, filter_desc, filter_data,
            conv_desc, algo, workspace, workspace_size,
            beta, dest_desc, dest_data):
        """Does convolution forward propagation.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: dest_data multiplier (numpy array with one element).
        """
        size = ffi.new("size_t *")
        err = self._lib.cudnnConvolutionForward(
            self.handle, CU.extract_ptr(alpha), src_desc, src_data,
            filter_desc, filter_data, conv_desc,
            algo, workspace, workspace_size,
            CU.extract_ptr(beta), dest_desc, dest_data)
        if err:
            raise CU.error("cudnnConvolutionForward", err)
        return int(size[0])
コード例 #6
0
ファイル: _cudnn.py プロジェクト: nagyistge/cuda4py
    def pooling_backward(self, pooling_desc, alpha, output_desc, output_data,
                         diff_desc, diff_data, input_desc, input_data,
                         beta, grad_desc, grad_data):
        """Does pooling backward propagation.

        Parameters:
            alpha: diff_data multiplier (numpy array with one element).
            beta: grad_data multiplier (numpy array with one element).
            output: output of the forward propagation.
            diff: error for backpropagation.
            input: input of the forward propagation.
            grad: backpropagated error.
        """
        err = self._lib.cudnnPoolingBackward(
            self.handle, pooling_desc, CU.extract_ptr(alpha),
            output_desc, output_data,
            diff_desc, diff_data, input_desc, input_data,
            CU.extract_ptr(beta), grad_desc, grad_data)
        if err:
            raise CU.error("cudnnPoolingBackward", err)
コード例 #7
0
ファイル: _cudnn.py プロジェクト: nagyistge/cuda4py
    def convolution_backward_data(
            self, alpha, filter_desc, filter_data, diff_desc, diff_data,
            conv_desc, beta, grad_desc, grad_data,
            algo=None, workspace=None, workspace_size=0):
        """Computes backpropagated error.

        Parameters:
            alpha: diff_data multiplier (numpy array with one element).
            beta: grad_data multiplier (numpy array with one element).
            filter_data: convolutional kernels.
            diff_data: error for backpropagation.
            grad_data: backpropagated error.
        """
        if self.version < 4000:
            err = self._lib.cudnnConvolutionBackwardData(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        elif algo is None:
            err = self._lib.cudnnConvolutionBackwardData_v2(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        else:
            err = self._lib.cudnnConvolutionBackwardData(
                self.handle, CU.extract_ptr(alpha), filter_desc, filter_data,
                diff_desc, diff_data, conv_desc,
                algo, workspace, workspace_size,
                CU.extract_ptr(beta), grad_desc, grad_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardData", err)
コード例 #8
0
ファイル: _cudnn.py プロジェクト: nagyistge/cuda4py
    def convolution_backward_filter(
            self, alpha, src_desc, src_data, diff_desc, diff_data, conv_desc,
            beta, grad_desc, grad_data,
            algo=None, workspace=None, workspace_size=0):
        """Computes gradient for the convolutional kernels.

        Parameters:
            alpha: src_data multiplier (numpy array with one element).
            beta: grad_data multiplier (numpy array with one element).
            src_data: input from the forward pass.
            diff_data: error for backpropagation.
            grad_data: gradient for convolutional kernels.
        """
        if self.version < 4000:
            err = self._lib.cudnnConvolutionBackwardFilter(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        elif algo is None:
            err = self._lib.cudnnConvolutionBackwardFilter_v2(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                CU.extract_ptr(beta), grad_desc, grad_data)
        else:
            err = self._lib.cudnnConvolutionBackwardFilter(
                self.handle, CU.extract_ptr(alpha), src_desc, src_data,
                diff_desc, diff_data, conv_desc,
                algo, workspace, workspace_size,
                CU.extract_ptr(beta), grad_desc, grad_data)
        if err:
            raise CU.error("cudnnConvolutionBackwardFilter", err)
コード例 #9
0
ファイル: _cublas.py プロジェクト: Samsung/cuda4py
    def sgemm_ex(self, transA, transB,
                 rowsCountA, columnCountB, commonSideLength,
                 alpha, A, B, beta, C,
                 strideA=0, strideB=0, strideC=0,
                 dtypeA=CUBLAS_DATA_HALF, dtypeB=CUBLAS_DATA_HALF,
                 dtypeC=CUBLAS_DATA_HALF):
        """Single precision (float) GEneral Matrix Multiplication
        with support of different data types for each matrix.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.
            dtypeA: data type of matrix A
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).
            dtypeB: data type of matrix B
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).
            dtypeC: data type of matrix C
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB if transB != CUBLAS_OP_N
                       else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasSgemmEx(
            self.handle, transA, transB, rowsCountA, columnCountB,
            commonSideLength, CU.extract_ptr(alpha), A, dtypeA, strideA,
            B, dtypeB, strideB, CU.extract_ptr(beta), C, dtypeC, strideC)
        if err:
            raise CU.error("cublasSgemmEx", err)
コード例 #10
0
    def sgemm_ex(self,
                 transA,
                 transB,
                 rowsCountA,
                 columnCountB,
                 commonSideLength,
                 alpha,
                 A,
                 B,
                 beta,
                 C,
                 strideA=0,
                 strideB=0,
                 strideC=0,
                 dtypeA=CUBLAS_DATA_HALF,
                 dtypeB=CUBLAS_DATA_HALF,
                 dtypeC=CUBLAS_DATA_HALF):
        """Single precision (float) GEneral Matrix Multiplication
        with support of different data types for each matrix.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.
            dtypeA: data type of matrix A
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).
            dtypeB: data type of matrix B
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).
            dtypeC: data type of matrix C
                    (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE,
                     CUBLAS_DATA_HALF, CUBLAS_DATA_INT8).

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB
                       if transB != CUBLAS_OP_N else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasSgemmEx(self.handle, transA, transB, rowsCountA,
                                      columnCountB, commonSideLength,
                                      CU.extract_ptr(alpha), A, dtypeA,
                                      strideA, B, dtypeB, strideB,
                                      CU.extract_ptr(beta), C, dtypeC, strideC)
        if err:
            raise CU.error("cublasSgemmEx", err)
コード例 #11
0
    def dgemm(self,
              transA,
              transB,
              rowsCountA,
              columnCountB,
              commonSideLength,
              alpha,
              A,
              B,
              beta,
              C,
              strideA=0,
              strideB=0,
              strideC=0):
        """Double precision (double) GEneral Matrix Multiplication.

        Matrices are always in column order.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        alpha, A, B, beta, C can be numpy array, Memory object,
        cffi pointer or int.

        Parameters:
            transA: how matrix A is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            transB: how matrix B is to be transposed
                    (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: matrix A.
            B: matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            strideA: leading dimension of matrix A:
                     clblasTrans: >= commonSideLength,
                     else: >= rowsCountA.
            strideB: leading dimension of matrix B:
                     clblasTrans: >= columnCountB,
                     else: >= commonSideLength.
            strideC: leading dimension of matrix C: >= rowsCountA.

        Returns:
            None.
        """
        if not strideA:
            strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA
        if not strideB:
            strideB = (columnCountB
                       if transB != CUBLAS_OP_N else commonSideLength)
        if not strideC:
            strideC = rowsCountA
        err = self._lib.cublasDgemm_v2(self.handle, transA, transB, rowsCountA,
                                       columnCountB, commonSideLength,
                                       CU.extract_ptr(alpha),
                                       A, strideA, B, strideB,
                                       CU.extract_ptr(beta), C, strideC)
        if err:
            raise CU.error("cublasDgemm_v2", err)