def dgema(disco, transA, transB, m, n, alpha, A, B, beta, maxTotalBlocks=128): """ Compute general matrix addition alpha*op(A) + beta*op(B) in double precision where op(X) = X or transpose(X). @param transA A boolean value for transposing matrix A or not. @param transB A boolean value for transposing matrix B or not. @param m Number of rows of matrix op(A). @param n Number of columns of matrix op(B). @param alpha Scalar multiplier for matrix A. @param beta Scalar multiplier for matrix B. @param A MatrixWrapper object encapsulating matrix A. @param B MatrixWrapper object encapsulating matrix B. @param disco A Disco instance. @param maxTotalBlocks Suggested number of matrix blocks to use for carrying out the addition. Ideally, this should equal to the number of cores available in the cluster. The actual number of blocks is selected based on the size of the matrix. @return MatrixWrapper object encapsulating the resulting matrix. """ def _mapBlocks(e, params): from math import ceil from numpy import float64 if type(e) == tuple: e = e[0] output = [] elems = e.split(";") for elem in elems: i, j, val = map(float64, elem.split(",")) if params.transpose: i, j = j, i assert i < params.m, "row index %d exceeds matrix dimensions" % int(i) assert j < params.n, "col index %d exceeds matrix dimensions" % int(j) blockX = int(j / params.blockWidth) blockY = int(i / params.blockHeight) offsetX = ceil(params.blockWidth * blockX) offsetY = ceil(params.blockHeight * blockY) val = params.scaling * val if val != 0.0: output += [(blockY*params.blocksPerRow+blockX, "%d,%d,%.14f" % (int(i-offsetY), int(j-offsetX), val))] return output def nop_map(e, params): return [e] def _reduceAddBlocks(iter, out, params): from numpy import float64 s = {} # add matrices for blockId, t in iter: blockId = int(blockId) rowIdx, colIdx, val = t.split(",") rowIdx = int(rowIdx) colIdx = int(colIdx) if not s.has_key(blockId): s[blockId] = {} if not s[blockId].has_key(rowIdx): s[blockId][rowIdx] = {} s[blockId][rowIdx][colIdx] = s[blockId][rowIdx].get(colIdx, 0) + float64(val) # output results from math import ceil from scipy.sparse import coo_matrix for blockId in s.keys(): # compute the index offset in the original matrix offsetY = ceil(params.blockHeight * (blockId / params.blocksPerRow)) offsetX = ceil(params.blockWidth * (blockId % params.blocksPerRow)) # map block indices into original indices for rowIdx in s[blockId].keys(): for colIdx in s[blockId][rowIdx].keys(): out.add("%d,%d,%.14f" % (rowIdx+offsetY, colIdx+offsetX, s[blockId][rowIdx][colIdx]), "") # find the best way to partition matrix to blocks blocksPerRow, blocksPerCol = _partition(m, n, maxTotalBlocks) blockHeight = float(m) / blocksPerCol blockWidth = float(n) / blocksPerRow totalBlocks = blocksPerRow * blocksPerCol # map and scale matrices params = Params(blocksPerRow=blocksPerRow, blocksPerCol=blocksPerCol, blockHeight=blockHeight, blockWidth=blockWidth) params.transpose = transA params.scaling = alpha params.m = m params.n = n jobMapA = disco.new_job(input=A.urls, name="dgema_mapA", map_reader=A.mapReader, map=_mapBlocks, params=params, nr_reduces=totalBlocks) resA = jobMapA.wait(clean=False, poll_interval=2) params.transpose = transB params.scaling = beta jobMapB = disco.new_job(input=B.urls, name="dgema_mapB", map_reader=B.mapReader, map=_mapBlocks, params=params, nr_reduces=totalBlocks) resB = jobMapB.wait(clean=False, poll_interval=2) # add matrices res = disco.new_job(input=resA+resB, name="dgema_reduce", map_reader=chain_reader, map=nop_map, params=params, reduce=_reduceAddBlocks, nr_reduces=totalBlocks).wait(clean=False, poll_interval=2) # clean up jobMapA.purge() jobMapB.purge() return MatrixWrapper(res, chain_reader)