コード例 #1
0
ファイル: _clblas.py プロジェクト: ernwa/opencl4py2
 def __init__(self):
     self._lib = None
     initialize()
     err = lib.clblasSetup()
     if err:
         raise CLRuntimeError("clblasSetup() failed with error %s" %
                              CL.get_error_description(err), err)
     self._lib = lib  # to hold the reference
コード例 #2
0
ファイル: _clblas.py プロジェクト: ernwa/opencl4py2
    def dgemm(self, queues, order, transA, transB,
              rowsCountA, columnCountB, commonSideLength,
              alpha, A, B, beta, C,
              offsetA=0, strideA=0,
              offsetB=0, strideB=0,
              offsetC=0, strideC=0,
              wait_for=None, need_event=False):
        """Double precision (double) GEneral Matrix Multiplication.

        C = alpha * dot(A, B) + beta * C
        C = alpha * dot(A^T, B) + beta * C
        C = alpha * dot(A, B^T) + beta * C
        C = alpha * dot(A^T, B^T) + beta * C

        Parameters:
            queues: list of the Queue objects on which this operation
                    will be enqueued.
            order: row/column order (clblasRowMajor, clblasColumnMajor).
            transA: how matrix A is to be transposed
                    (clblasNoTrans, clblasTrans, clblasConjTrans).
            transB: how matrix B is to be transposed
                    (clblasNoTrans, clblasTrans, clblasConjTrans).
            rowsCountA: number of rows in matrix A.
            columnCountB: number of columns in matrix B.
            commonSideLength: length of the common side of the matrices.
            alpha: the factor of matrix A.
            A: Buffer object storing matrix A.
            B: Buffer object storing matrix B.
            beta: the factor of matrix C.
            C: Buffer object storing matrix C.
            offsetA: offset of the first element of the matrix A
                     in the buffer object, counted in elements.
            strideA: leading dimension of matrix A:
                     ((clblasNoTrans, clblasRowMajor) or
                      (clblasTrans, clblasColumnMajor)): >= commonSideLength,
                     else: >= rowsCountA.
            offsetB: offset of the first element of the matrix B
                     in the buffer object, counted in elements.
            strideB: leading dimension of matrix B:
                     ((clblasNoTrans, clblasRowMajor) or
                      (clblasTrans, clblasColumnMajor)): >= columnCountB,
                     else: >= commonSideLength.
            offsetC: offset of the first element of the matrix C
                     in the buffer object, counted in elements.
            strideC: leading dimension of matrix C:
                     clblasRowMajor: >= columnCountB,
                     else: >= rowsCountA.
            wait_for: list of the Event objects to wait.
            need_event: return Event object or not.

        Returns:
            Event object or None if need_event == False.
        """
        event = ffi.new("cl_event[]", 1) if need_event else clffi.ffi.NULL
        wait_list, n_events = CL.get_wait_list(wait_for)
        _queues = ffi.new("cl_command_queue[]", len(queues))
        for i, q in enumerate(queues):
            _queues[i] = q.handle
        if not strideA:
            strideA = (
                commonSideLength
                if ((transA == clblasNoTrans and order == clblasRowMajor) or
                    (transA != clblasNoTrans and order == clblasColumnMajor))
                else rowsCountA)
        if not strideB:
            strideB = (
                columnCountB
                if ((transB == clblasNoTrans and order == clblasRowMajor) or
                    (transB != clblasNoTrans and order == clblasColumnMajor))
                else commonSideLength)
        if not strideC:
            strideC = columnCountB if order == clblasRowMajor else rowsCountA
        err = self._lib.clblasDgemm(
            order, transA, transB, rowsCountA, columnCountB, commonSideLength,
            alpha, A.handle, offsetA, strideA, B.handle, offsetB, strideB,
            beta, C.handle, offsetC, strideC, len(queues), _queues,
            n_events, wait_list, event)
        if err:
            raise CLRuntimeError("clblasDgemm() failed with error %s" %
                                 CL.get_error_description(err), err)
        return Event(event[0]) if event != clffi.ffi.NULL else None