Example #1
0
File: blas.py Project: davin/disco
def dgemm(disco, transA, transB, m, n, k, alpha, A, B, beta, C, maxTotalBlocks=128):
	"""
	Compute general matrix multiplication alpha*op(A)*op(B) + beta*C in double precision where op(X) = X or transpose(X).
	@param transA A boolean value for transposing matrix A or not.
	@param transB A boolean value for transposing matrix B or not.
	@param m Number of rows of matrix op(A) and C.
	@param n Number of columns of matrix op(B) and C.
	@param k Number of columns of matrix op(A) and rows of matrix op(B).
	@param alpha Scalar multiplier for the matrix product A*B.
	@param beta Scalar multiplier for matrix C.
	@param A MatrixWrapper object encapsulating matrix A.
	@param B MatrixWrapper object encapsulating matrix B.
	@param C MatrixWrapper object encapsulating matrix C. If there is no C term, then pass in an empty wrapper, MatrixWrapper(), as placeholder.
	@param disco A Disco instance.
	@param maxTotalBlocks Suggested number of matrix blocks to use for carrying out the multiplication. Ideally, this should equal to the number of cores available in the cluster. The actual number of blocks is selected based on the size of the matrix.
	@return MatrixWrapper object encapsulating the resulting matrix.
	"""
	def _mapRowBlocks(e, params):
		from math import ceil
		from numpy import float64
		if type(e) == tuple:
			e = e[0]
		output = []
		elems = e.split(";")
		for elem in elems:
			i, j, val = map(float64, elem.split(","))
			if params.transA:
				i, j = j, i
			assert i < params.m, "row index %d exceeds matrix dimensions" % int(i)
			assert j < params.k, "col index %d exceeds matrix dimensions" % int(j)
			blockX = int(j / params.blockWidth)
			blockY = int(i / params.blockHeight)
			offsetY = ceil(params.blockHeight * blockY)
			val = params.alpha * val
			if val != 0.0:
				output += [(blockY*params.blocksPerRow+x, "%s,%d,%d,%.14f" % (params.matrixId, int(i-offsetY), int(j), val)) for x in range(0, params.blocksPerRow)]
		return output
		
	def _mapColBlocks(e, params):
		from math import ceil
		from numpy import float64
		if type(e) == tuple:
			e = e[0]
		output = []
		elems = e.split(";")
		for elem in elems:
			i, j, val = map(float64, elem.split(","))
			if params.transB:
				i, j = j, i
			assert i < params.k, "row index %d exceeds matrix dimensions" % int(i)
			assert j < params.n, "col index %d exceeds matrix dimensions" % int(j)
			blockX = int(j / params.blockWidth)
			blockX = int(j / params.blockWidth)
			offsetX = ceil(params.blockWidth * blockX)
			if val != 0.0:
				output += [(y*params.blocksPerRow+blockX, "%s,%d,%d,%.14f" % (params.matrixId, int(i), int(j-offsetX), val)) for y in range(0, params.blocksPerCol)]
		return output
		
	def _mapBlocks(e, params):
		from math import ceil
		from numpy import float64
		if type(e) == tuple:
			e = e[0]
		output = []
		elems = e.split(";")
		for elem in elems:
			i, j, val = map(float64, elem.split(","))
			assert i < params.m, "row index %d exceeds matrix dimensions" % int(i)
			assert j < params.n, "col index %d exceeds matrix dimensions" % int(j)
			blockX = int(j / params.blockWidth)
			blockX = int(j / params.blockWidth)
			blockY = int(i / params.blockHeight)
			offsetX = ceil(params.blockWidth * blockX)
			offsetY = ceil(params.blockHeight * blockY)
			val = params.beta*val
			if val != 0.0:
				output += [(blockY*params.blocksPerRow+blockX, "%s,%d,%d,%.14f" % (params.matrixId, int(i-offsetY), int(j-offsetX), val))]
		return output

	def nop_map(e, params):
		return [e]

	def _reduceMultiplyAndAdd(iter, out, params):
		from numpy import float64
		rows = {}
		cols = {}
		vals = {}
		maxColIdx = {}
		maxRowIdx = {}
		for blockId, s in iter:
			blockId = int(blockId)
			matrixId, rowIdx, colIdx, val = s.split(",")
			rowIdx = int(rowIdx)
			colIdx = int(colIdx)
			val = float64(val)
			if not rows.has_key(blockId):
				rows[blockId] = {}
				cols[blockId] = {}
				vals[blockId] = {}
				maxColIdx[blockId] = {}
				maxRowIdx[blockId] = {}
			if not rows[blockId].has_key(matrixId):
				rows[blockId][matrixId] = []
				cols[blockId][matrixId] = []
				vals[blockId][matrixId] = []
				maxColIdx[blockId][matrixId] = 0
				maxRowIdx[blockId][matrixId] = 0
			rows[blockId][matrixId].append(rowIdx)
			cols[blockId][matrixId].append(colIdx)
			vals[blockId][matrixId].append(val)
			maxColIdx[blockId][matrixId] = max(maxColIdx[blockId][matrixId], cols[blockId][matrixId][-1])
			maxRowIdx[blockId][matrixId] = max(maxRowIdx[blockId][matrixId], rows[blockId][matrixId][-1])
		# initialize sparse matrices
		from math import ceil
		from scipy.sparse import coo_matrix
		for blockId in rows.keys():
			# compute the index offset in the original matrix
			blockY = blockId / params.blocksPerRow
			blockX = blockId % params.blocksPerRow
			offsetY = ceil(params.blockHeight * blockY)
			offsetX = ceil(params.blockWidth * blockX)
			# compute matrix product
			if not vals[blockId].has_key('A') or not vals[blockId].has_key('B'):
				# skip multiplication since either block A or B is empty
				if vals[blockId].has_key('C'):
					# return beta*C
					P = coo_matrix((vals[blockId]['C'],(rows[blockId]['C'],cols[blockId]['C'])), dtype=float64, dims=(maxRowIdx[blockId]['C']+1, maxColIdx[blockId]['C']+1))
				else:
					P = None
			else:
				if vals[blockId].has_key('C'):
					m = max(maxRowIdx[blockId]['A'], maxRowIdx[blockId]['C']) + 1
					n = max(maxColIdx[blockId]['B'], maxColIdx[blockId]['C']) + 1
					C = coo_matrix((vals[blockId]['C'],(rows[blockId]['C'],cols[blockId]['C'])), dtype=float64, dims=(m,n))
				else:
					m = maxRowIdx[blockId]['A'] + 1
					n = maxColIdx[blockId]['B'] + 1
					C = coo_matrix(([],([],[])), dtype=float64, dims=(m,n))
				A = coo_matrix((vals[blockId]['A'],(rows[blockId]['A'],cols[blockId]['A'])), dtype=float64, dims=(m,max(maxColIdx[blockId]['A'], maxRowIdx[blockId]['B'])+1))
				B = coo_matrix((vals[blockId]['B'],(rows[blockId]['B'],cols[blockId]['B'])), dtype=float64, dims=(max(maxColIdx[blockId]['A'], maxRowIdx[blockId]['B'])+1, n))
				P = (A * B + C).tocoo()
			# map block indices into original indices
			if P != None:
				start = 0
				while start < len(P.row):
					end = min(start+params.elemsPerLine, len(P.row))
					out.add(";".join(["%d,%d,%.14f" % (P.row[i]+offsetY, P.col[i]+offsetX, P.data[i]) for i in range(start,end)]), "")
					start = end

	# find the best way to partition matrix into blocks
	blocksPerRow, blocksPerCol = _partition(m, n, maxTotalBlocks)
	blockHeight = float(m) / blocksPerCol
	blockWidth = float(n) / blocksPerRow
	totalBlocks = blocksPerRow * blocksPerCol
	#print "%dx%d blocks used with block dimension %fx%f" % (blocksPerCol, blocksPerRow, blockHeight, blockWidth)
	params = Params(blocksPerRow=blocksPerRow, blocksPerCol=blocksPerCol, blockHeight=blockHeight, blockWidth=blockWidth, alpha=alpha, beta=beta, transA=transA, transB=transB, m=m, k=k, n=n)
	params.elemsPerLine = 1000
	# map matrix A into row blocks
	params.matrixId = 'A'
	jobMapA = disco.new_job(input=A.urls, name="dgemm_mapA", map_reader=A.mapReader, map=_mapRowBlocks, params=params, nr_reduces=totalBlocks)
	resA = jobMapA.wait(clean=False, poll_interval=2)
	# map matrix B into col blocks
	params.matrixId = 'B'
	jobMapB = disco.new_job(input=B.urls, name="dgemm_mapB", map_reader=B.mapReader, map=_mapColBlocks, params=params, nr_reduces=totalBlocks)
	resB = jobMapB.wait(clean=False, poll_interval=2)
	# map matrix C into blocks
	if len(C.urls) == 0: # quick fix for disco bug
		resC = []
	else:
		params.matrixId = 'C'
		jobMapC = disco.new_job(input=C.urls, name="dgemm_mapC", map_reader=C.mapReader, map=_mapBlocks, params=params, nr_reduces=totalBlocks)
		resC = jobMapC.wait(clean=False, poll_interval=2)
	# multiply the blocks
	res = disco.new_job(input=resA+resB+resC, name="dgemm_reduce", map_reader=chain_reader, map=nop_map, nr_reduces=totalBlocks, reduce=_reduceMultiplyAndAdd, params=params).wait(clean=False, poll_interval=2)
	# clean up
	jobMapA.purge()
	jobMapB.purge()
	if len(C.urls) > 0: # quick fix for disco bug
		jobMapC.purge()
	return MatrixWrapper(res, chain_reader)