def first_mapper(data): """ This mapper doesn't take any input, and generates the R factor. """ hostname = os.uname()[1] print >>sys.stderr, hostname, "is a mapper" # suck up all the data so Hadoop doesn't complain for key,val in data: pass n = gopts.getintkey('ncols') m = int(os.getenv('nrows')) k = int(os.getenv('maprows'))/n s = float(m)/float(n) setstatus( "generating %i-by-%i R matrix with scale factor %i/%i=%s"%( n, n, m, n, s)) R = numpy.triu(numpy.ones((n,n)))/math.sqrt(s) for i in xrange(k): setstatus( 'step %i/%i: generating local %i-by-%i Q matrix'%(i+1,k,n,n)) Q = numpy.linalg.qr(numpy.random.randn(n,n))[0] # just the Q factor setstatus('step %i/%i: multiplying local matrix'%(i+1,k)) A = Q.dot(R) setstatus('step %i/%i: outputting %i rows'%(i+1,k,A.shape[0])) for row in A: key = random.randint(0, 4000000000) yield key, util.array2list(row)
def first_mapper(data): """ This mapper doesn't take any input, and generates the R factor. """ hostname = os.uname()[1] print >> sys.stderr, hostname, "is a mapper" # suck up all the data so Hadoop doesn't complain for key, val in data: pass n = gopts.getintkey('ncols') m = int(os.getenv('nrows')) k = int(os.getenv('maprows')) / n s = float(m) / float(n) setstatus("generating %i-by-%i R matrix with scale factor %i/%i=%s" % (n, n, m, n, s)) R = numpy.triu(numpy.ones((n, n))) / math.sqrt(s) for i in xrange(k): setstatus('step %i/%i: generating local %i-by-%i Q matrix' % (i + 1, k, n, n)) Q = numpy.linalg.qr(numpy.random.randn(n, n))[0] # just the Q factor setstatus('step %i/%i: multiplying local matrix' % (i + 1, k)) A = Q.dot(R) setstatus('step %i/%i: outputting %i rows' % (i + 1, k, A.shape[0])) for row in A: key = random.randint(0, 4000000000) yield key, util.array2list(row)
def localQoutput(rows): setstatus('converting to numpy array') A = numpy.array(rows) localm = A.shape[0] setstatus('generating local Q of size %i-by-%i'%(localm,localm)) Q = numpy.linalg.qr(numpy.random.randn(localm,localm))[0] # just the Q factor setstatus( 'multiplying %i-by-%i A by %i-by-%i Q'%(localm,A.shape[1],localm,localm)) A = Q.dot(A) setstatus('outputting') for row in A: yield util.array2list(row)
def localQoutput(rows): setstatus('converting to numpy array') A = numpy.array(rows) localm = A.shape[0] setstatus('generating local Q of size %i-by-%i' % (localm, localm)) Q = numpy.linalg.qr(numpy.random.randn(localm, localm))[0] # just the Q factor setstatus('multiplying %i-by-%i A by %i-by-%i Q' % (localm, A.shape[1], localm, localm)) A = Q.dot(A) setstatus('outputting') for row in A: yield util.array2list(row)