def dgemm(self, transA, transB, rowsCountA, columnCountB, commonSideLength, alpha, A, B, beta, C, strideA=0, strideB=0, strideC=0): """Double precision (double) GEneral Matrix Multiplication. Matrices are always in column order. C = alpha * dot(A, B) + beta * C C = alpha * dot(A^T, B) + beta * C C = alpha * dot(A, B^T) + beta * C C = alpha * dot(A^T, B^T) + beta * C alpha, A, B, beta, C can be numpy array, Memory object, cffi pointer or int. Parameters: transA: how matrix A is to be transposed (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C). transB: how matrix B is to be transposed (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C). rowsCountA: number of rows in matrix A. columnCountB: number of columns in matrix B. commonSideLength: length of the common side of the matrices. alpha: the factor of matrix A. A: matrix A. B: matrix B. beta: the factor of matrix C. C: Buffer object storing matrix C. strideA: leading dimension of matrix A: clblasTrans: >= commonSideLength, else: >= rowsCountA. strideB: leading dimension of matrix B: clblasTrans: >= columnCountB, else: >= commonSideLength. strideC: leading dimension of matrix C: >= rowsCountA. Returns: None. """ if not strideA: strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA if not strideB: strideB = (columnCountB if transB != CUBLAS_OP_N else commonSideLength) if not strideC: strideC = rowsCountA err = self._lib.cublasDgemm_v2( self.handle, transA, transB, rowsCountA, columnCountB, commonSideLength, CU.extract_ptr(alpha), A, strideA, B, strideB, CU.extract_ptr(beta), C, strideC) if err: raise CU.error("cublasDgemm_v2", err)
def transform_tensor(self, alpha, src_desc, src_data, beta, dest_desc, dest_data): """Transforms data from one layout to another (interleaved to splitted for example). Parameters: alpha: src_data multiplier (numpy array with one element). beta: dest_data multiplier (numpy array with one element). """ err = self._lib.cudnnTransformTensor( self.handle, CU.extract_ptr(alpha), src_desc, src_data, CU.extract_ptr(beta), dest_desc, dest_data) if err: raise CU.error("cudnnTransformTensor", err)
def pooling_forward(self, pooling_desc, alpha, src_desc, src_data, beta, dest_desc, dest_data): """Does pooling forward propagation. Parameters: alpha: src_data multiplier (numpy array with one element). beta: dest_data multiplier (numpy array with one element). """ err = self._lib.cudnnPoolingForward( self.handle, pooling_desc, CU.extract_ptr(alpha), src_desc, src_data, CU.extract_ptr(beta), dest_desc, dest_data) if err: raise CU.error("cudnnPoolingForward", err)
def convolution_backward_bias(self, alpha, src_desc, src_data, beta, dest_desc, dest_data): """Computes gradient for the bias. Parameters: alpha: src_data multiplier (numpy array with one element). beta: dest_data multiplier (numpy array with one element). src_data: error for backpropagation. dest_data: gradient for the bias. """ err = self._lib.cudnnConvolutionBackwardBias( self.handle, CU.extract_ptr(alpha), src_desc, src_data, CU.extract_ptr(beta), dest_desc, dest_data) if err: raise CU.error("cudnnConvolutionBackwardBias", err)
def convolution_forward( self, alpha, src_desc, src_data, filter_desc, filter_data, conv_desc, algo, workspace, workspace_size, beta, dest_desc, dest_data): """Does convolution forward propagation. Parameters: alpha: src_data multiplier (numpy array with one element). beta: dest_data multiplier (numpy array with one element). """ size = ffi.new("size_t *") err = self._lib.cudnnConvolutionForward( self.handle, CU.extract_ptr(alpha), src_desc, src_data, filter_desc, filter_data, conv_desc, algo, workspace, workspace_size, CU.extract_ptr(beta), dest_desc, dest_data) if err: raise CU.error("cudnnConvolutionForward", err) return int(size[0])
def pooling_backward(self, pooling_desc, alpha, output_desc, output_data, diff_desc, diff_data, input_desc, input_data, beta, grad_desc, grad_data): """Does pooling backward propagation. Parameters: alpha: diff_data multiplier (numpy array with one element). beta: grad_data multiplier (numpy array with one element). output: output of the forward propagation. diff: error for backpropagation. input: input of the forward propagation. grad: backpropagated error. """ err = self._lib.cudnnPoolingBackward( self.handle, pooling_desc, CU.extract_ptr(alpha), output_desc, output_data, diff_desc, diff_data, input_desc, input_data, CU.extract_ptr(beta), grad_desc, grad_data) if err: raise CU.error("cudnnPoolingBackward", err)
def convolution_backward_data( self, alpha, filter_desc, filter_data, diff_desc, diff_data, conv_desc, beta, grad_desc, grad_data, algo=None, workspace=None, workspace_size=0): """Computes backpropagated error. Parameters: alpha: diff_data multiplier (numpy array with one element). beta: grad_data multiplier (numpy array with one element). filter_data: convolutional kernels. diff_data: error for backpropagation. grad_data: backpropagated error. """ if self.version < 4000: err = self._lib.cudnnConvolutionBackwardData( self.handle, CU.extract_ptr(alpha), filter_desc, filter_data, diff_desc, diff_data, conv_desc, CU.extract_ptr(beta), grad_desc, grad_data) elif algo is None: err = self._lib.cudnnConvolutionBackwardData_v2( self.handle, CU.extract_ptr(alpha), filter_desc, filter_data, diff_desc, diff_data, conv_desc, CU.extract_ptr(beta), grad_desc, grad_data) else: err = self._lib.cudnnConvolutionBackwardData( self.handle, CU.extract_ptr(alpha), filter_desc, filter_data, diff_desc, diff_data, conv_desc, algo, workspace, workspace_size, CU.extract_ptr(beta), grad_desc, grad_data) if err: raise CU.error("cudnnConvolutionBackwardData", err)
def convolution_backward_filter( self, alpha, src_desc, src_data, diff_desc, diff_data, conv_desc, beta, grad_desc, grad_data, algo=None, workspace=None, workspace_size=0): """Computes gradient for the convolutional kernels. Parameters: alpha: src_data multiplier (numpy array with one element). beta: grad_data multiplier (numpy array with one element). src_data: input from the forward pass. diff_data: error for backpropagation. grad_data: gradient for convolutional kernels. """ if self.version < 4000: err = self._lib.cudnnConvolutionBackwardFilter( self.handle, CU.extract_ptr(alpha), src_desc, src_data, diff_desc, diff_data, conv_desc, CU.extract_ptr(beta), grad_desc, grad_data) elif algo is None: err = self._lib.cudnnConvolutionBackwardFilter_v2( self.handle, CU.extract_ptr(alpha), src_desc, src_data, diff_desc, diff_data, conv_desc, CU.extract_ptr(beta), grad_desc, grad_data) else: err = self._lib.cudnnConvolutionBackwardFilter( self.handle, CU.extract_ptr(alpha), src_desc, src_data, diff_desc, diff_data, conv_desc, algo, workspace, workspace_size, CU.extract_ptr(beta), grad_desc, grad_data) if err: raise CU.error("cudnnConvolutionBackwardFilter", err)
def sgemm_ex(self, transA, transB, rowsCountA, columnCountB, commonSideLength, alpha, A, B, beta, C, strideA=0, strideB=0, strideC=0, dtypeA=CUBLAS_DATA_HALF, dtypeB=CUBLAS_DATA_HALF, dtypeC=CUBLAS_DATA_HALF): """Single precision (float) GEneral Matrix Multiplication with support of different data types for each matrix. Matrices are always in column order. C = alpha * dot(A, B) + beta * C C = alpha * dot(A^T, B) + beta * C C = alpha * dot(A, B^T) + beta * C C = alpha * dot(A^T, B^T) + beta * C alpha, A, B, beta, C can be numpy array, Memory object, cffi pointer or int. Parameters: transA: how matrix A is to be transposed (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C). transB: how matrix B is to be transposed (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C). rowsCountA: number of rows in matrix A. columnCountB: number of columns in matrix B. commonSideLength: length of the common side of the matrices. alpha: the factor of matrix A. A: matrix A. B: matrix B. beta: the factor of matrix C. C: Buffer object storing matrix C. strideA: leading dimension of matrix A: clblasTrans: >= commonSideLength, else: >= rowsCountA. strideB: leading dimension of matrix B: clblasTrans: >= columnCountB, else: >= commonSideLength. strideC: leading dimension of matrix C: >= rowsCountA. dtypeA: data type of matrix A (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE, CUBLAS_DATA_HALF, CUBLAS_DATA_INT8). dtypeB: data type of matrix B (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE, CUBLAS_DATA_HALF, CUBLAS_DATA_INT8). dtypeC: data type of matrix C (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE, CUBLAS_DATA_HALF, CUBLAS_DATA_INT8). Returns: None. """ if not strideA: strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA if not strideB: strideB = (columnCountB if transB != CUBLAS_OP_N else commonSideLength) if not strideC: strideC = rowsCountA err = self._lib.cublasSgemmEx( self.handle, transA, transB, rowsCountA, columnCountB, commonSideLength, CU.extract_ptr(alpha), A, dtypeA, strideA, B, dtypeB, strideB, CU.extract_ptr(beta), C, dtypeC, strideC) if err: raise CU.error("cublasSgemmEx", err)
def sgemm_ex(self, transA, transB, rowsCountA, columnCountB, commonSideLength, alpha, A, B, beta, C, strideA=0, strideB=0, strideC=0, dtypeA=CUBLAS_DATA_HALF, dtypeB=CUBLAS_DATA_HALF, dtypeC=CUBLAS_DATA_HALF): """Single precision (float) GEneral Matrix Multiplication with support of different data types for each matrix. Matrices are always in column order. C = alpha * dot(A, B) + beta * C C = alpha * dot(A^T, B) + beta * C C = alpha * dot(A, B^T) + beta * C C = alpha * dot(A^T, B^T) + beta * C alpha, A, B, beta, C can be numpy array, Memory object, cffi pointer or int. Parameters: transA: how matrix A is to be transposed (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C). transB: how matrix B is to be transposed (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C). rowsCountA: number of rows in matrix A. columnCountB: number of columns in matrix B. commonSideLength: length of the common side of the matrices. alpha: the factor of matrix A. A: matrix A. B: matrix B. beta: the factor of matrix C. C: Buffer object storing matrix C. strideA: leading dimension of matrix A: clblasTrans: >= commonSideLength, else: >= rowsCountA. strideB: leading dimension of matrix B: clblasTrans: >= columnCountB, else: >= commonSideLength. strideC: leading dimension of matrix C: >= rowsCountA. dtypeA: data type of matrix A (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE, CUBLAS_DATA_HALF, CUBLAS_DATA_INT8). dtypeB: data type of matrix B (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE, CUBLAS_DATA_HALF, CUBLAS_DATA_INT8). dtypeC: data type of matrix C (CUBLAS_DATA_FLOAT, CUBLAS_DATA_DOUBLE, CUBLAS_DATA_HALF, CUBLAS_DATA_INT8). Returns: None. """ if not strideA: strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA if not strideB: strideB = (columnCountB if transB != CUBLAS_OP_N else commonSideLength) if not strideC: strideC = rowsCountA err = self._lib.cublasSgemmEx(self.handle, transA, transB, rowsCountA, columnCountB, commonSideLength, CU.extract_ptr(alpha), A, dtypeA, strideA, B, dtypeB, strideB, CU.extract_ptr(beta), C, dtypeC, strideC) if err: raise CU.error("cublasSgemmEx", err)
def dgemm(self, transA, transB, rowsCountA, columnCountB, commonSideLength, alpha, A, B, beta, C, strideA=0, strideB=0, strideC=0): """Double precision (double) GEneral Matrix Multiplication. Matrices are always in column order. C = alpha * dot(A, B) + beta * C C = alpha * dot(A^T, B) + beta * C C = alpha * dot(A, B^T) + beta * C C = alpha * dot(A^T, B^T) + beta * C alpha, A, B, beta, C can be numpy array, Memory object, cffi pointer or int. Parameters: transA: how matrix A is to be transposed (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C). transB: how matrix B is to be transposed (CUBLAS_OP_N, CUBLAS_OP_T, CUBLAS_OP_C). rowsCountA: number of rows in matrix A. columnCountB: number of columns in matrix B. commonSideLength: length of the common side of the matrices. alpha: the factor of matrix A. A: matrix A. B: matrix B. beta: the factor of matrix C. C: Buffer object storing matrix C. strideA: leading dimension of matrix A: clblasTrans: >= commonSideLength, else: >= rowsCountA. strideB: leading dimension of matrix B: clblasTrans: >= columnCountB, else: >= commonSideLength. strideC: leading dimension of matrix C: >= rowsCountA. Returns: None. """ if not strideA: strideA = commonSideLength if transA != CUBLAS_OP_N else rowsCountA if not strideB: strideB = (columnCountB if transB != CUBLAS_OP_N else commonSideLength) if not strideC: strideC = rowsCountA err = self._lib.cublasDgemm_v2(self.handle, transA, transB, rowsCountA, columnCountB, commonSideLength, CU.extract_ptr(alpha), A, strideA, B, strideB, CU.extract_ptr(beta), C, strideC) if err: raise CU.error("cublasDgemm_v2", err)