Beispiel #1
0
def SolveWeighted(A, B, C, D, lambd):
    BScale = El.SparseMatrix()
    El.Copy(B, BScale)
    El.Scale(lambd, BScale)

    DScale = El.Matrix()
    El.Copy(D, DScale)
    El.Scale(lambd, DScale)

    AEmb = El.VCat(A, BScale)
    CEmb = El.VCat(C, DScale)

    X = El.LeastSquares(AEmb, CEmb, ctrl)

    El.Copy(C, E)
    El.Multiply(El.NORMAL, -1., A, X, 1., E)
    residNorm = El.FrobeniusNorm(E)
    if display:
        El.Display(E, "C - A X")
    print "lambda=", lambd, ": || C - A X ||_F / || C ||_F =", residNorm / CNorm

    El.Copy(D, E)
    El.Multiply(El.NORMAL, -1., B, X, 1., E)
    equalNorm = El.FrobeniusNorm(E)
    if display:
        El.Display(E, "D - B X")
    print "lambda=", lambd, ": || D - B X ||_F / || D ||_F =", equalNorm / DNorm
Beispiel #2
0
def SolveWeighted(A, B, D, lambd):
    AScale = El.DistSparseMatrix()
    El.Copy(A, AScale)
    El.Scale(lambd, AScale)
    AEmb = El.HCat(AScale, B)
    if display:
        El.Display(AEmb, "[lambda*A, B]")
    if output:
        El.Print(AEmb, "[lambda*A, B]")

    ctrl.alpha = baseAlpha
    if worldRank == 0:
        print "lambda=", lambd, ": ctrl.alpha=", ctrl.alpha
    XEmb = El.LeastSquares(AEmb, D, ctrl)

    X = XEmb[0:n0 * n1, 0:numRHS]
    Y = XEmb[n0 * n1:n0 * n1 + numColsB, 0:numRHS]
    El.Scale(lambd, X)

    YNorm = El.FrobeniusNorm(Y)
    if worldRank == 0:
        print "lambda=", lambd, ": || Y ||_F =", YNorm

    El.Copy(D, E)
    El.Multiply(El.NORMAL, -1., A, X, 1., E)
    El.Multiply(El.NORMAL, -1., B, Y, 1., E)
    residNorm = El.FrobeniusNorm(E)
    if worldRank == 0:
        print "lambda=", lambd, ": || D - A X - B Y ||_F / || D ||_F =", residNorm / DNorm
Beispiel #3
0
    def test_approximate_symmetric_svd(self):
        """Compute the SVD of symmetric **A** such that **SVD(A) = V S V^T**"""
        n = 100
        A = El.DistMatrix()
        El.Uniform(A, n, n)
        A = A.Matrix()

        # Make A symmetric
        for i in xrange(0, A.Height()):
            for j in xrange(0, i + 1):
                A.Set(j, i, A.Get(i, j))

        # Usign symmetric SVD
        SA = El.Matrix()
        VA = El.Matrix()

        sl_nla.approximate_symmetric_svd(A, SA, VA, k=n)

        # Check result
        VAT = El.Matrix()
        El.Copy(VA, VAT)

        RESULT = El.Matrix()
        El.Zeros(RESULT, n, n)

        El.DiagonalScale(El.RIGHT, El.NORMAL, SA, VAT)
        El.Gemm(El.NORMAL, El.ADJOINT, 1, VAT, VA, 1, RESULT)

        self.assertTrue(utils.equal(A, RESULT))
Beispiel #4
0
    def _read_elemental_dense(self, colDist=El.MC, rowDist=El.MR):

        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()

        # only the root process touches the filesystem
        if rank == 0:
            f = h5py.File(self.fpath, 'r')
            dataset_obj = f[self.dataset]
        shape = dataset_obj.shape if rank == 0 else None
        shape = comm.bcast(shape, root=0)
        height = shape[0]
        width = shape[1]

        num_entries = height * width
        # max memory capacity per process assumed/hardcoded to 10 blocks
        # XXX should this number be passed as a parameter?
        max_blocks_per_process = 10
        max_block_entries = int(
            (1.0 * num_entries) / (max_blocks_per_process * size))

        # XXX We could set up a different block generating scheme, e.g. more
        # square-ish blocks
        block_height = int(numpy.sqrt(max_block_entries))
        while max_block_entries % block_height != 0:
            block_height = block_height + 1
        block_width = max_block_entries / block_height
        num_height_blocks = int(numpy.ceil(height / (1.0 * block_height)))
        num_width_blocks = int(numpy.ceil(width / (1.0 * block_width)))
        num_blocks = num_height_blocks * num_width_blocks

        A = El.DistMatrix(colDist=colDist, rowDist=rowDist)
        for block in range(num_blocks):
            # the global coordinates of the block corners
            i_start = (block / num_width_blocks) * block_height
            j_start = (block % num_width_blocks) * block_width
            i_end = min(height, i_start + block_height)
            j_end = min(width, j_start + block_width)
            # the block size
            local_height = i_end - i_start
            local_width = j_end - j_start
            # [CIRC, CIRC] matrix is populated by the reader process (i.e. the root)...
            A_block = El.DistMatrix(colDist=El.CIRC, rowDist=El.CIRC)
            A_block.Resize(local_height, local_width)
            if rank == 0:
                for j in range(j_start, j_end):
                    for i in range(i_start, i_end):
                        A_block.SetLocal(i - i_start, j - j_start,
                                         dataset_obj[i, j])
            # ... then a view into the full matrix A is constructed...
            A_block_view = A[i_start:i_end, j_start:j_end]
            # ... and finally this view is updated by redistribution of the [CIRC, CIRC] block
            El.Copy(A_block, A_block_view)
        if rank == 0:
            f.close()
        return A
Beispiel #5
0
    def _write_elemental_dense(self, A):
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()

        # XXX currently gathers at root
        A_CIRC_CIRC = El.DistMatrix(colDist=El.CIRC, rowDist=El.CIRC)
        El.Copy(A, A_CIRC_CIRC)
        if rank == 0:
            A_numpy_dense = A_CIRC_CIRC.Matrix().ToNumPy()
            self._write_numpy_dense(A_numpy_dense)
Beispiel #6
0
def SolveWeighted(A,B,C,D,lambd):
  BScale = El.DistSparseMatrix()
  El.Copy( B, BScale )
  El.Scale( lambd, BScale )

  DScale = El.DistMultiVec()
  El.Copy( D, DScale ) 
  El.Scale( lambd, DScale )

  AEmb = El.VCat(A,BScale)
  CEmb = El.VCat(C,DScale)
  if output:
    El.Print( AEmb, "AEmb" )

  ctrl.alpha = baseAlpha
  if worldRank == 0:
    print('lambda={}, ctrl.alpha={}'.format(lambd,ctrl.alpha))
  X=El.LeastSquares(AEmb,CEmb,ctrl)

  El.Copy( C, E )
  El.Multiply( El.NORMAL, -1., A, X, 1., E )
  residNorm = El.FrobeniusNorm( E )
  if display:
    El.Display( E, "C - A X" )
  if output:
    El.Print( E, "C - A X" )
  if worldRank == 0:
    print('lambda={}: || C - A X ||_F / || C ||_F = {}'.format(lambd, \
      residNorm/CNorm))

  El.Copy( D, E )
  El.Multiply( El.NORMAL, -1., B, X, 1., E )
  equalNorm = El.FrobeniusNorm( E )
  if display:
    El.Display( E, "D - B X" )
  if output:
    El.Print( E, "D - B X" )
  if worldRank == 0:
    print('lambda={}: || D - B X ||_F / || D ||_F = {}'.format(lambd, \
      equalNorm/DNorm))
Beispiel #7
0
        y = s / xSize
        A.QueueLocalUpdate(sLoc, s, 2 * (hxInvSq + hyInvSq))
        if x != 0: A.QueueLocalUpdate(sLoc, s - 1, -hxInvSq)
        if x != xSize - 1: A.QueueLocalUpdate(sLoc, s + 1, -hxInvSq)
        if y != 0: A.QueueLocalUpdate(sLoc, s - xSize, -hyInvSq)
        if y != ySize - 1: A.QueueLocalUpdate(sLoc, s + xSize, -hyInvSq)

    A.ProcessQueues()
    return A


A = Laplacian(n0, n1)
x = El.DistMultiVec()
y = El.DistMultiVec()
El.Uniform(x, n0 * n1, 1)
El.Copy(x, y)

yNrm = El.Nrm2(y)
if worldRank == 0:
    print "|| y ||_2 =", yNrm

El.Display(A, "Laplacian")
El.Display(A.DistGraph(), "Laplacian graph")
El.Display(y, "y")

El.SymmetricSolve(A, x)
El.Display(x, "x")

xNrm = El.Nrm2(x)
if worldRank == 0:
    print "|| x ||_2 =", xNrm
Beispiel #8
0
import El

n = 1000
output = True

A = El.Matrix(El.zTag)
El.Uniform(A, n, n, 0., 10.)

T = El.Matrix(El.zTag)
El.Copy(A, T)
w, Q = El.Schur(T, fullTriangle=True, vectors=True)

X = El.TriangEig(T)

if output:
    El.Print(A, "A")
    El.Print(w, "w")
    El.Print(Q, "Q")
    El.Print(T, "T")
    El.Print(X, "X")
Beispiel #9
0
ctrl = El.LeastSquaresCtrl_d()
ctrl.progress = True
ctrl.solveCtrl.relTol = 1e-10
ctrl.solveCtrl.relTolRefine = 1e-12
ctrl.solveCtrl.progress = True

startLSE = El.mpi.Time()
X = El.LSE(A, B, C, D, ctrl)
endLSE = El.mpi.Time()
print "LSE time:", endLSE - startLSE, "seconds"
if display:
    El.Display(X, "X")

E = El.Matrix()

El.Copy(C, E)
El.Multiply(El.NORMAL, -1., A, X, 1., E)
residNorm = El.FrobeniusNorm(E)
if display:
    El.Display(E, "C - A X")
print "|| C - A X ||_F / || C ||_F =", residNorm / CNorm

El.Copy(D, E)
El.Multiply(El.NORMAL, -1., B, X, 1., E)
equalNorm = El.FrobeniusNorm(E)
if display:
    El.Display(E, "D - B X")
print "|| D - B X ||_F / || D ||_F =", equalNorm / DNorm


# Now try solving a weighted least squares problem
Beispiel #10
0
for j in xrange(0, numLambdas):
    lambd = startLambda + j * (endLambda - startLambda) / (numLambdas - 1.)
    if worldRank == 0:
        print('lambda = {}'.format(lambd))

    startTV = El.mpi.Time()
    x = El.TV(b, lambd, ctrl)
    endTV = El.mpi.Time()
    if worldRank == 0:
        print('TV time: {}'.format(endTV - startTV))

    Dx = El.DistMultiVec()
    El.Zeros(Dx, n - 1, 1)
    El.Multiply(El.NORMAL, 1., D, x, 0., Dx)
    if display:
        El.Display(x, "x")
        El.Display(Dx, "Dx")

    DxOneNorm = El.EntrywiseNorm(Dx, 1)
    e = El.DistMultiVec()
    El.Copy(b, e)
    El.Axpy(-1., x, e)
    if display:
        El.Display(e, "e")
    eTwoNorm = El.Nrm2(e)
    if worldRank == 0:
        print('|| D x ||_1   = {}'.format(DxOneNorm))
        print('|| x - b ||_2 = {}'.format(eTwoNorm))

El.Finalize()
Beispiel #11
0
ctrl.mehrotraCtrl.qsdCtrl.progress = True
ctrl.mehrotraCtrl.progress = True
ctrl.mehrotraCtrl.outerEquil = True
ctrl.mehrotraCtrl.time = True
startLAV = time.clock()
x = El.LAV(A, b, ctrl)
endLAV = time.clock()
if worldRank == 0:
    print "LAV time:", endLAV - startLAV, "seconds"
if display:
    El.Display(x, "x")

bTwoNorm = El.Nrm2(b)
bInfNorm = El.MaxNorm(b)
r = El.DistMultiVec()
El.Copy(b, r)
El.SparseMultiply(El.NORMAL, -1., A, x, 1., r)
if display:
    El.Display(r, "r")
rTwoNorm = El.Nrm2(r)
rOneNorm = El.EntrywiseNorm(r, 1)
if worldRank == 0:
    print "|| b ||_2       =", bTwoNorm
    print "|| b ||_oo      =", bInfNorm
    print "|| A x - b ||_2 =", rTwoNorm
    print "|| A x - b ||_1 =", rOneNorm

startLS = time.clock()
xLS = El.LeastSquares(A, b)
endLS = time.clock()
if worldRank == 0:
Beispiel #12
0
yOrig = El.DistMultiVec()
zOrig = El.DistMultiVec()
if manualInit:
    El.Uniform(xOrig, n, 1, 0.5, 0.4999)
    El.Uniform(yOrig, m, 1, 0.5, 0.4999)
    El.Uniform(zOrig, n, 1, 0.5, 0.4999)
x = El.DistMultiVec()
y = El.DistMultiVec()
z = El.DistMultiVec()

if testMehrotra:
    ctrl.approach = El.LP_MEHROTRA
    ctrl.mehrotraCtrl.primalInit = manualInit
    ctrl.mehrotraCtrl.dualInit = manualInit
    ctrl.mehrotraCtrl.progress = progress
    El.Copy(xOrig, x)
    El.Copy(yOrig, y)
    El.Copy(zOrig, z)
    startMehrotra = El.mpi.Time()
    El.LPDirect(A, b, c, x, y, z, ctrl)
    endMehrotra = El.mpi.Time()
    if worldRank == 0:
        print "Mehrotra time:", endMehrotra - startMehrotra

    if display:
        El.Display(x, "x Mehrotra")
        El.Display(y, "y Mehrotra")
        El.Display(z, "z Mehrotra")

    obj = El.Dot(c, x)
    if worldRank == 0:
Beispiel #13
0
for j in xrange(0, numLambdas):
    lambd = startLambda + j * (endLambda - startLambda) / (numLambdas - 1.)
    if worldRank == 0:
        print "lambda =", lambd

    startDS = time.clock()
    x = El.DS(A, b, lambd, ctrl)
    endDS = time.clock()
    if worldRank == 0:
        print "DS time:", endDS - startDS, "seconds"
    if display:
        El.Display(x, "x")

    xOneNorm = El.EntrywiseNorm(x, 1)
    r = El.DistMultiVec()
    El.Copy(b, r)
    El.SparseMultiply(El.NORMAL, -1., A, x, 1., r)
    rTwoNorm = El.Nrm2(r)
    t = El.DistMultiVec()
    El.Zeros(t, 2 * n0 * n1, 1)
    El.SparseMultiply(El.TRANSPOSE, 1., A, r, 0., t)
    tTwoNorm = El.Nrm2(t)
    tInfNorm = El.MaxNorm(t)
    if display:
        El.Display(r, "r")
        El.Display(t, "t")
    if worldRank == 0:
        print "|| x ||_1       =", xOneNorm
        print "|| b - A x ||_2 =", rTwoNorm
        print "|| A^T (b - A x) ||_2 =", tTwoNorm
        print "|| A^T (b - A x) ||_oo =", tInfNorm
    s = 0
    row = 0
    for e in data[0].indptr[1:]:
        for idx in range(s, e):
            col = data[0].indices[idx]
            val = data[0].data[idx]
            X_ll.Set(row, col, val)
        s = e
        row = row + 1

    Y_ll = Y_cc.Matrix()
    for j in range(shape_X[0]):
        Y_ll.Set(j, 0, data[1][j] - 1)

X = El.DistMatrix(colDist=El.VC, rowDist=El.STAR)
El.Copy(X_cc, X)
Y = El.DistMatrix(colDist=El.VC, rowDist=El.STAR)
El.Copy(Y_cc, Y)

if rank == 0: print "Doing the regression..."

n = X.Height()
d = X.Width()

# Create right-hand side for the regression
k = int(El.Max(Y)[0] + 1)
rY = El.DistMatrix(colDist=El.VC, rowDist=El.STAR)
rY.Resize(Y.Height(), k)
El.Fill(rY, -1.0)
for i in range(rY.LocalHeight()):
    rY.SetLocal(i, int(Y.GetLocal(i, 0)), 1.0)
Beispiel #15
0
endGLM = El.mpi.Time()
if worldRank == 0:
    print "GLM time:", endGLM - startGLM, "seconds"
if display:
    El.Display(X, "X")
    El.Display(Y, "Y")
if output:
    El.Print(X, "X")
    El.Print(Y, "Y")

YNorm = El.FrobeniusNorm(Y)
if worldRank == 0:
    print "|| Y ||_F =", YNorm

E = El.DistMultiVec()
El.Copy(D, E)
El.Multiply(El.NORMAL, -1., A, X, 1., E)
El.Multiply(El.NORMAL, -1., B, Y, 1., E)
residNorm = El.FrobeniusNorm(E)
if display:
    El.Display(E, "D - A X - B Y")
if output:
    El.Print(E, "D - A X - B Y")
if worldRank == 0:
    print "|| D - A X - B Y ||_F / || D ||_F =", residNorm / DNorm


# Now try solving a weighted least squares problem
# (as lambda -> infinity, the exact solution converges to that of LSE)
def SolveWeighted(A, B, D, lambd):
    AScale = El.DistSparseMatrix()
Beispiel #16
0
# Solve without resolving the regularization
ctrl.mehrotraCtrl.resolveReg = False
startLOP = El.mpi.Time()
x = El.LongOnlyPortfolio(d,F,c,gamma,ctrl)
endLOP = El.mpi.Time()
if worldRank == 0:
  print('LOP time (no resolve reg. w/ equil): {} seconds'.format( \
    endLOP-startLOP))
if display:
  El.Display( x, "x" )

# Compute the risk-adjusted return
# ================================
e = El.DistMultiVec()
f = El.DistMultiVec()
El.Copy( x, e )
El.DiagonalScale( El.LEFT, El.NORMAL, d, e )
El.Zeros( f, r, 1 )
El.Multiply( El.TRANSPOSE, 1., F, x, 0., f )
El.Multiply( El.NORMAL, 1., F, f, 1., e )
rar = El.Dot(c,x) - gamma*El.Dot(x,e)
if worldRank == 0:
  print('c^T x - gamma x^T (D + F F^T) x = {}'.format(rar))

xOneNorm = El.EntrywiseNorm( x, 1 )
xTwoNorm = El.Nrm2( x )
if worldRank == 0:
  print('|| x ||_1 = {}'.format(xOneNorm))
  print('|| x ||_2 = {}'.format(xTwoNorm))

El.Finalize()
Beispiel #17
0
s, z, orders, firstInds = ConstructPrimalDual(m)
n = s.Height()
if output:
    El.Print(s, "s")
    El.Print(z, "z")
    El.Print(orders, "orders")
    El.Print(firstInds, "firstInds")

# Compute the (Jordan) determinants and number of non-positive SOC members
# ========================================================================
sDets = El.SOCDets(s, orders, firstInds, cutoff)
zDets = El.SOCDets(z, orders, firstInds, cutoff)
sDetsBcast = El.DistMultiVec()
zDetsBcast = El.DistMultiVec()
El.Copy(sDets, sDetsBcast)
El.Copy(zDets, zDetsBcast)
El.ConeBroadcast(sDetsBcast, orders, firstInds, cutoff)
El.ConeBroadcast(zDetsBcast, orders, firstInds, cutoff)
sNumNonPos = El.NumNonSOC(s, orders, firstInds, cutoff)
zNumNonPos = El.NumNonSOC(z, orders, firstInds, cutoff)
if output:
    El.Print(sDets, "det(s)")
    El.Print(zDets, "det(z)")
    El.Print(sDetsBcast, "Broadcasted det(s)")
    El.Print(zDetsBcast, "Broadcasted det(z)")
    if worldRank == 0:
        print('# non-SOC in s: {}'.format(sNumNonPos))
        print('# non-SOC in z: {}'.format(zNumNonPos))

# Compute the square-roots of s and z
Beispiel #18
0
if output:
    El.Print(BOrig, "B")

ctrl = El.LLLCtrl_d()
ctrl.progress = progress
ctrl.time = timeLLL

B = El.Matrix()
for presort, smallestFirst in (True, True), (True, False), (False, False):
    for deltaLower in 0.5, 0.75, 0.95, 0.98:
        for weak in True, False:

            print "weak=%r, presort=%r, smallestFirst=%r, deltaLower=%f" % \
              (weak,presort,smallestFirst,deltaLower)

            El.Copy(BOrig, B)

            ctrl.delta = deltaLower
            ctrl.weak = weak
            ctrl.presort = presort
            ctrl.smallestFirst = smallestFirst

            # Compute the image and kernel
            startTime = El.mpi.Time()
            M, K = El.LatticeImageAndKernel(B, ctrl)
            runTime = El.mpi.Time() - startTime
            print "  runtime: %f seconds" % runTime
            print "  nullity: ", K.Width()
            if output:
                El.Print(M, "Image")
                El.Print(K, "Kernel")
Beispiel #19
0
import math, El
k = 140  # matrix size
realRes = imagRes = 100  # grid resolution

# Display an instance of the pathological example
A = El.DistMatrix()
El.DruinskyToledo(A, k)
El.Display(A, "Bunch-Kaufman growth matrix")

# Display the spectral portrait
portrait, box = El.SpectralPortrait(A, realRes, imagRes)
El.DisplayPortrait(portrait, box, "spectral portrait of BK growth matrix")

# Make a copy before overwriting with LDL factorization
A_LU = El.DistMatrix()
El.Copy(A, A_LU)

# Display the relevant pieces of pivoted LDL factorization
dSub, p = El.LDL(A, False, El.BUNCH_KAUFMAN_A)
El.MakeTrapezoidal(El.LOWER, A)
El.Display(dSub, "Subdiagonal of D from LDL")
#P = El.DistMatrix(iTag,MC,MR,A.Grid())
# TODO: Construct P from p
#El.Display(P,"P")
El.EntrywiseMap(A, lambda x: math.log10(max(abs(x), 1)))
El.Display(A, "Logarithmically-scaled LDL triangular factor")

# Display the relevant pieces of a pivoted LU factorization
p_LU = El.LU(A_LU)
El.Display(p_LU, "LU permutation")
El.EntrywiseMap(A_LU, lambda x: math.log10(max(abs(x), 1)))
Beispiel #20
0
#-24  36 -46 -82 
A = El.Matrix()
El.Zeros(A,4,4)
A.Set(0,0,-6)
A.Set(1,0,+4)
A.Set(2,0,10)
A.Set(3,0,-24)
A.Set(0,1,9)
A.Set(1,1,-6)
A.Set(2,1,15)
A.Set(3,1,36)
A.Set(2,1,-15)
A.Set(0,2,-15)
A.Set(1,2,10)
A.Set(2,2,18)
A.Set(3,2,-46)
A.Set(0,3,-18)
A.Set(1,3,12)
A.Set(2,3,35)
A.Set(3,3,-82)
El.Print(A)

B=El.Matrix()

for presorted, smallestFirst in (True,True), (True,False), (False,False):
  for deltaLower in 0.5, 0.75, 0.95, 0.98:
    print "Testing with presorted=%r, smallestFirst=%r, deltaLower=%f" % \
      (presorted,smallestFirst,deltaLower)
    El.Copy( A, B )
    El.LLL(B,deltaLower,0.,presorted,smallestFirst)
Beispiel #21
0
# Generate a (b,h) which implies a primal feasible (x,s)
# ======================================================
xGen = El.DistMatrix()
# b := A xGen
# -----------
El.Gaussian(xGen,n,1)
b = El.DistMatrix()
El.Zeros( b, m, 1 )
El.Gemv( El.NORMAL, 1., A, xGen, 0., b )
# h := G xGen + sGen
# ------------------
sGen = El.DistMatrix()
El.Uniform(sGen,k,1,0.5,0.5)
h = El.DistMatrix()
El.Copy( sGen, h )
El.Gemv( El.NORMAL, 1., G, xGen, 1., h )

# Generate a c which implies a dual feasible (y,z)
# ================================================
yGen = El.DistMatrix()
El.Gaussian(yGen,m,1)
zGen = El.DistMatrix()
El.Uniform(zGen,k,1,0.5,0.5)
c = El.DistMatrix()
El.Zeros( c, n, 1 )
El.Gemv( El.TRANSPOSE, -1., A, yGen, 1., c )
El.Gemv( El.TRANSPOSE, -1., G, zGen, 1., c )

if display:
  El.Display( A, "A" )
Beispiel #22
0
n = 300
t = 1000
#sketches = { "JLT" : sketch.JLT, "FJLT" : sketch.FJLT, "CWT" : sketch.CWT }
sketches = {"JLT": sketch.JLT, "CWT": sketch.CWT}

# Set up the random regression problem.
A = El.DistMatrix((El.dTag, El.VR, El.STAR))
El.Uniform(A, m, n)
b = El.DistMatrix((El.dTag, El.VR, El.STAR))
El.Uniform(b, m, 1)

# Solve using Elemental
# Elemental currently does not support LS on VR,STAR.
# So we copy.
A1 = El.DistMatrix()
El.Copy(A, A1)
b1 = El.DistMatrix()
El.Copy(b, b1)
x = El.DistMatrix(El.dTag, El.MC, El.MR)
El.Uniform(x, n, 1)
t0 = time.time()
El.LeastSquares(A1, b1, El.NORMAL, x)
telp = time.time() - t0

# Compute residual
r = El.DistMatrix()
El.Copy(b, r)
El.Gemv(El.NORMAL, -1.0, A1, x, 1.0, r)
res = El.Norm(r)
if (MPI.COMM_WORLD.Get_rank() == 0):
    print "Exact solution residual %(res).3f\t\t\ttook %(elp).2e sec" % \