def __init__(self): self._lib = None initialize() err = lib.clblasSetup() if err: raise CLRuntimeError("clblasSetup() failed with error %s" % CL.get_error_description(err), err) self._lib = lib # to hold the reference
def dgemm(self, queues, order, transA, transB, rowsCountA, columnCountB, commonSideLength, alpha, A, B, beta, C, offsetA=0, strideA=0, offsetB=0, strideB=0, offsetC=0, strideC=0, wait_for=None, need_event=False): """Double precision (double) GEneral Matrix Multiplication. C = alpha * dot(A, B) + beta * C C = alpha * dot(A^T, B) + beta * C C = alpha * dot(A, B^T) + beta * C C = alpha * dot(A^T, B^T) + beta * C Parameters: queues: list of the Queue objects on which this operation will be enqueued. order: row/column order (clblasRowMajor, clblasColumnMajor). transA: how matrix A is to be transposed (clblasNoTrans, clblasTrans, clblasConjTrans). transB: how matrix B is to be transposed (clblasNoTrans, clblasTrans, clblasConjTrans). rowsCountA: number of rows in matrix A. columnCountB: number of columns in matrix B. commonSideLength: length of the common side of the matrices. alpha: the factor of matrix A. A: Buffer object storing matrix A. B: Buffer object storing matrix B. beta: the factor of matrix C. C: Buffer object storing matrix C. offsetA: offset of the first element of the matrix A in the buffer object, counted in elements. strideA: leading dimension of matrix A: ((clblasNoTrans, clblasRowMajor) or (clblasTrans, clblasColumnMajor)): >= commonSideLength, else: >= rowsCountA. offsetB: offset of the first element of the matrix B in the buffer object, counted in elements. strideB: leading dimension of matrix B: ((clblasNoTrans, clblasRowMajor) or (clblasTrans, clblasColumnMajor)): >= columnCountB, else: >= commonSideLength. offsetC: offset of the first element of the matrix C in the buffer object, counted in elements. strideC: leading dimension of matrix C: clblasRowMajor: >= columnCountB, else: >= rowsCountA. wait_for: list of the Event objects to wait. need_event: return Event object or not. Returns: Event object or None if need_event == False. """ event = ffi.new("cl_event[]", 1) if need_event else clffi.ffi.NULL wait_list, n_events = CL.get_wait_list(wait_for) _queues = ffi.new("cl_command_queue[]", len(queues)) for i, q in enumerate(queues): _queues[i] = q.handle if not strideA: strideA = ( commonSideLength if ((transA == clblasNoTrans and order == clblasRowMajor) or (transA != clblasNoTrans and order == clblasColumnMajor)) else rowsCountA) if not strideB: strideB = ( columnCountB if ((transB == clblasNoTrans and order == clblasRowMajor) or (transB != clblasNoTrans and order == clblasColumnMajor)) else commonSideLength) if not strideC: strideC = columnCountB if order == clblasRowMajor else rowsCountA err = self._lib.clblasDgemm( order, transA, transB, rowsCountA, columnCountB, commonSideLength, alpha, A.handle, offsetA, strideA, B.handle, offsetB, strideB, beta, C.handle, offsetC, strideC, len(queues), _queues, n_events, wait_list, event) if err: raise CLRuntimeError("clblasDgemm() failed with error %s" % CL.get_error_description(err), err) return Event(event[0]) if event != clffi.ffi.NULL else None