def dgemm(disco, transA, transB, m, n, k, alpha, A, B, beta, C, maxTotalBlocks=128): """ Compute general matrix multiplication alpha*op(A)*op(B) + beta*C in double precision where op(X) = X or transpose(X). @param transA A boolean value for transposing matrix A or not. @param transB A boolean value for transposing matrix B or not. @param m Number of rows of matrix op(A) and C. @param n Number of columns of matrix op(B) and C. @param k Number of columns of matrix op(A) and rows of matrix op(B). @param alpha Scalar multiplier for the matrix product A*B. @param beta Scalar multiplier for matrix C. @param A MatrixWrapper object encapsulating matrix A. @param B MatrixWrapper object encapsulating matrix B. @param C MatrixWrapper object encapsulating matrix C. If there is no C term, then pass in an empty wrapper, MatrixWrapper(), as placeholder. @param disco A Disco instance. @param maxTotalBlocks Suggested number of matrix blocks to use for carrying out the multiplication. Ideally, this should equal to the number of cores available in the cluster. The actual number of blocks is selected based on the size of the matrix. @return MatrixWrapper object encapsulating the resulting matrix. """ def _mapRowBlocks(e, params): from math import ceil from numpy import float64 if type(e) == tuple: e = e[0] output = [] elems = e.split(";") for elem in elems: i, j, val = map(float64, elem.split(",")) if params.transA: i, j = j, i assert i < params.m, "row index %d exceeds matrix dimensions" % int(i) assert j < params.k, "col index %d exceeds matrix dimensions" % int(j) blockX = int(j / params.blockWidth) blockY = int(i / params.blockHeight) offsetY = ceil(params.blockHeight * blockY) val = params.alpha * val if val != 0.0: output += [(blockY*params.blocksPerRow+x, "%s,%d,%d,%.14f" % (params.matrixId, int(i-offsetY), int(j), val)) for x in range(0, params.blocksPerRow)] return output def _mapColBlocks(e, params): from math import ceil from numpy import float64 if type(e) == tuple: e = e[0] output = [] elems = e.split(";") for elem in elems: i, j, val = map(float64, elem.split(",")) if params.transB: i, j = j, i assert i < params.k, "row index %d exceeds matrix dimensions" % int(i) assert j < params.n, "col index %d exceeds matrix dimensions" % int(j) blockX = int(j / params.blockWidth) blockX = int(j / params.blockWidth) offsetX = ceil(params.blockWidth * blockX) if val != 0.0: output += [(y*params.blocksPerRow+blockX, "%s,%d,%d,%.14f" % (params.matrixId, int(i), int(j-offsetX), val)) for y in range(0, params.blocksPerCol)] return output def _mapBlocks(e, params): from math import ceil from numpy import float64 if type(e) == tuple: e = e[0] output = [] elems = e.split(";") for elem in elems: i, j, val = map(float64, elem.split(",")) assert i < params.m, "row index %d exceeds matrix dimensions" % int(i) assert j < params.n, "col index %d exceeds matrix dimensions" % int(j) blockX = int(j / params.blockWidth) blockX = int(j / params.blockWidth) blockY = int(i / params.blockHeight) offsetX = ceil(params.blockWidth * blockX) offsetY = ceil(params.blockHeight * blockY) val = params.beta*val if val != 0.0: output += [(blockY*params.blocksPerRow+blockX, "%s,%d,%d,%.14f" % (params.matrixId, int(i-offsetY), int(j-offsetX), val))] return output def nop_map(e, params): return [e] def _reduceMultiplyAndAdd(iter, out, params): from numpy import float64 rows = {} cols = {} vals = {} maxColIdx = {} maxRowIdx = {} for blockId, s in iter: blockId = int(blockId) matrixId, rowIdx, colIdx, val = s.split(",") rowIdx = int(rowIdx) colIdx = int(colIdx) val = float64(val) if not rows.has_key(blockId): rows[blockId] = {} cols[blockId] = {} vals[blockId] = {} maxColIdx[blockId] = {} maxRowIdx[blockId] = {} if not rows[blockId].has_key(matrixId): rows[blockId][matrixId] = [] cols[blockId][matrixId] = [] vals[blockId][matrixId] = [] maxColIdx[blockId][matrixId] = 0 maxRowIdx[blockId][matrixId] = 0 rows[blockId][matrixId].append(rowIdx) cols[blockId][matrixId].append(colIdx) vals[blockId][matrixId].append(val) maxColIdx[blockId][matrixId] = max(maxColIdx[blockId][matrixId], cols[blockId][matrixId][-1]) maxRowIdx[blockId][matrixId] = max(maxRowIdx[blockId][matrixId], rows[blockId][matrixId][-1]) # initialize sparse matrices from math import ceil from scipy.sparse import coo_matrix for blockId in rows.keys(): # compute the index offset in the original matrix blockY = blockId / params.blocksPerRow blockX = blockId % params.blocksPerRow offsetY = ceil(params.blockHeight * blockY) offsetX = ceil(params.blockWidth * blockX) # compute matrix product if not vals[blockId].has_key('A') or not vals[blockId].has_key('B'): # skip multiplication since either block A or B is empty if vals[blockId].has_key('C'): # return beta*C P = coo_matrix((vals[blockId]['C'],(rows[blockId]['C'],cols[blockId]['C'])), dtype=float64, dims=(maxRowIdx[blockId]['C']+1, maxColIdx[blockId]['C']+1)) else: P = None else: if vals[blockId].has_key('C'): m = max(maxRowIdx[blockId]['A'], maxRowIdx[blockId]['C']) + 1 n = max(maxColIdx[blockId]['B'], maxColIdx[blockId]['C']) + 1 C = coo_matrix((vals[blockId]['C'],(rows[blockId]['C'],cols[blockId]['C'])), dtype=float64, dims=(m,n)) else: m = maxRowIdx[blockId]['A'] + 1 n = maxColIdx[blockId]['B'] + 1 C = coo_matrix(([],([],[])), dtype=float64, dims=(m,n)) A = coo_matrix((vals[blockId]['A'],(rows[blockId]['A'],cols[blockId]['A'])), dtype=float64, dims=(m,max(maxColIdx[blockId]['A'], maxRowIdx[blockId]['B'])+1)) B = coo_matrix((vals[blockId]['B'],(rows[blockId]['B'],cols[blockId]['B'])), dtype=float64, dims=(max(maxColIdx[blockId]['A'], maxRowIdx[blockId]['B'])+1, n)) P = (A * B + C).tocoo() # map block indices into original indices if P != None: start = 0 while start < len(P.row): end = min(start+params.elemsPerLine, len(P.row)) out.add(";".join(["%d,%d,%.14f" % (P.row[i]+offsetY, P.col[i]+offsetX, P.data[i]) for i in range(start,end)]), "") start = end # find the best way to partition matrix into blocks blocksPerRow, blocksPerCol = _partition(m, n, maxTotalBlocks) blockHeight = float(m) / blocksPerCol blockWidth = float(n) / blocksPerRow totalBlocks = blocksPerRow * blocksPerCol #print "%dx%d blocks used with block dimension %fx%f" % (blocksPerCol, blocksPerRow, blockHeight, blockWidth) params = Params(blocksPerRow=blocksPerRow, blocksPerCol=blocksPerCol, blockHeight=blockHeight, blockWidth=blockWidth, alpha=alpha, beta=beta, transA=transA, transB=transB, m=m, k=k, n=n) params.elemsPerLine = 1000 # map matrix A into row blocks params.matrixId = 'A' jobMapA = disco.new_job(input=A.urls, name="dgemm_mapA", map_reader=A.mapReader, map=_mapRowBlocks, params=params, nr_reduces=totalBlocks) resA = jobMapA.wait(clean=False, poll_interval=2) # map matrix B into col blocks params.matrixId = 'B' jobMapB = disco.new_job(input=B.urls, name="dgemm_mapB", map_reader=B.mapReader, map=_mapColBlocks, params=params, nr_reduces=totalBlocks) resB = jobMapB.wait(clean=False, poll_interval=2) # map matrix C into blocks if len(C.urls) == 0: # quick fix for disco bug resC = [] else: params.matrixId = 'C' jobMapC = disco.new_job(input=C.urls, name="dgemm_mapC", map_reader=C.mapReader, map=_mapBlocks, params=params, nr_reduces=totalBlocks) resC = jobMapC.wait(clean=False, poll_interval=2) # multiply the blocks res = disco.new_job(input=resA+resB+resC, name="dgemm_reduce", map_reader=chain_reader, map=nop_map, nr_reduces=totalBlocks, reduce=_reduceMultiplyAndAdd, params=params).wait(clean=False, poll_interval=2) # clean up jobMapA.purge() jobMapB.purge() if len(C.urls) > 0: # quick fix for disco bug jobMapC.purge() return MatrixWrapper(res, chain_reader)