def first_mapper(data):
    """ This mapper doesn't take any input, and generates the R factor. """
    hostname = os.uname()[1]
    print >>sys.stderr, hostname, "is a mapper"
    
    # suck up all the data so Hadoop doesn't complain
    for key,val in data:
        pass
    
    n = gopts.getintkey('ncols')
    m = int(os.getenv('nrows'))
    k = int(os.getenv('maprows'))/n
    s = float(m)/float(n)
    setstatus(
        "generating %i-by-%i R matrix with scale factor %i/%i=%s"%(
        n, n, m, n, s))
    
    R = numpy.triu(numpy.ones((n,n)))/math.sqrt(s)
    
    for i in xrange(k):
        setstatus(
            'step %i/%i: generating local %i-by-%i Q matrix'%(i+1,k,n,n))
        
        Q = numpy.linalg.qr(numpy.random.randn(n,n))[0] # just the Q factor
        setstatus('step %i/%i: multiplying local matrix'%(i+1,k))
        A = Q.dot(R)
        setstatus('step %i/%i: outputting %i rows'%(i+1,k,A.shape[0]))
        for row in A:
            key = random.randint(0, 4000000000)
            yield key, util.array2list(row)
def first_mapper(data):
    """ This mapper doesn't take any input, and generates the R factor. """
    hostname = os.uname()[1]
    print >> sys.stderr, hostname, "is a mapper"

    # suck up all the data so Hadoop doesn't complain
    for key, val in data:
        pass

    n = gopts.getintkey('ncols')
    m = int(os.getenv('nrows'))
    k = int(os.getenv('maprows')) / n
    s = float(m) / float(n)
    setstatus("generating %i-by-%i R matrix with scale factor %i/%i=%s" %
              (n, n, m, n, s))

    R = numpy.triu(numpy.ones((n, n))) / math.sqrt(s)

    for i in xrange(k):
        setstatus('step %i/%i: generating local %i-by-%i Q matrix' %
                  (i + 1, k, n, n))

        Q = numpy.linalg.qr(numpy.random.randn(n, n))[0]  # just the Q factor
        setstatus('step %i/%i: multiplying local matrix' % (i + 1, k))
        A = Q.dot(R)
        setstatus('step %i/%i: outputting %i rows' % (i + 1, k, A.shape[0]))
        for row in A:
            key = random.randint(0, 4000000000)
            yield key, util.array2list(row)
def localQoutput(rows):
    
    setstatus('converting to numpy array')
    A = numpy.array(rows)
    localm = A.shape[0]
    
    setstatus('generating local Q of size %i-by-%i'%(localm,localm))
    Q = numpy.linalg.qr(numpy.random.randn(localm,localm))[0] # just the Q factor
    setstatus(
        'multiplying %i-by-%i A by %i-by-%i Q'%(localm,A.shape[1],localm,localm))
    A = Q.dot(A)
    
    setstatus('outputting')
    for row in A:
        yield util.array2list(row)
def localQoutput(rows):

    setstatus('converting to numpy array')
    A = numpy.array(rows)
    localm = A.shape[0]

    setstatus('generating local Q of size %i-by-%i' % (localm, localm))
    Q = numpy.linalg.qr(numpy.random.randn(localm,
                                           localm))[0]  # just the Q factor
    setstatus('multiplying %i-by-%i A by %i-by-%i Q' %
              (localm, A.shape[1], localm, localm))
    A = Q.dot(A)

    setstatus('outputting')
    for row in A:
        yield util.array2list(row)