def __cngrnc(r, x, alpha = 1.0, trans = 'N', offsetx = 0): """ In-place congruence transformation x := alpha * r * x * r' if trans = 'N'. x := alpha * r' * x * r if trans = 'T'. r is a square n x n-matrix. x is a square n x n-matrix or n^2-vector. """ n = r.size[0] # Scale diagonal of x by 1/2. blas.scal(0.5, x, inc = n+1, offset = offsetx) # a := r*tril(x) if trans is 'N', a := tril(x)*r if trans is 'T'. a = +r if trans == 'N': blas.trmm(x, a, side = 'R', m = n, n = n, ldA = n, ldB = n, offsetA = offsetx) else: blas.trmm(x, a, side = 'L', m = n, n = n, ldA = n, ldB = n, offsetA = offsetx) # x := alpha * (a * r' + r * a') # = alpha * r * (tril(x) + tril(x)') * r' if trans is 'N'. # x := alpha * (a' * r + r' * a) # = alpha * r' * (tril(x)' + tril(x)) * r if trans = 'T'. blas.syr2k(r, a, x, trans = trans, alpha = alpha, n = n, k = n, ldB = n, ldC = n, offsetC = offsetx)
def llt(L): """ Supernodal multifrontal Cholesky product: .. math:: X = LL^T where :math:`L` is lower-triangular. On exit, the argument `L` contains the product :math:`X`. :param L: :py:class:`cspmatrix` (factor) """ assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval stack = [] for k in snpost: nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # compute [I; L_{Ak,Nk}]*D_k*[I;L_{Ak,Nk}]'; store in F blas.trmm(blkval, blkval, side = "R", m = na, n = nn, ldA = nj,\ ldB = nj, offsetA = blkptr[k], offsetB = blkptr[k]+nn) F = matrix(0.0, (nj, nj)) blas.syrk(blkval, F, n = nj, k = nn, offsetA = blkptr[k], ldA = nj) # if supernode k has any children, subtract update matrices for _ in range(chptr[k],chptr[k+1]): Ui, i = stack.pop() frontal_add_update(F, Ui, relidx, relptr, i) # if supernode k is not a root node, push update matrix onto stack if na > 0: Uk = matrix(0.0,(na,na)) lapack.lacpy(F, Uk, m = na, n = na, uplo = 'L', offsetA = nn*nj+nn, ldA = nj) stack.append((Uk,k)) # copy leading Nk columns of F to blkval lapack.lacpy(F, blkval, m = nj, n = nn, ldA = nj, uplo = 'L',\ ldB = nj, offsetB = blkptr[k]) L._is_factor = False return
def cngrnc(r, x, alpha = 1.0): """ Congruence transformation x := alpha * r'*x*r. r and x are square matrices. """ # Scale diagonal of x by 1/2. x[::n+1] *= 0.5 # a := tril(x)*r a = +r tx = matrix(x, (n,n)) blas.trmm(tx, a, side='L') # x := alpha*(a*r' + r*a') blas.syr2k(r, a, tx, trans = 'T', alpha = alpha) x[:] = tx[:]
def cngrnc(r, x, alpha = 1.0): """ Congruence transformation x := alpha * r'*x*r. r and x are square matrices. """ # Scale diagonal of x by 1/2. x[::n+1] *= 0.5 # a := tril(x)*r a = +r tx = matrix(x, (n,n)) blas.trmm(tx, a, side = 'L') # x := alpha*(a*r' + r*a') blas.syr2k(r, a, tx, trans = 'T', alpha = alpha) x[:] = tx[:]
def __cngrnc(r, x, alpha=1.0, trans='N', offsetx=0): """ In-place congruence transformation x := alpha * r * x * r' if trans = 'N'. x := alpha * r' * x * r if trans = 'T'. r is a square n x n-matrix. x is a square n x n-matrix or n^2-vector. """ n = r.size[0] # Scale diagonal of x by 1/2. blas.scal(0.5, x, inc=n + 1, offset=offsetx) # a := r*tril(x) if trans is 'N', a := tril(x)*r if trans is 'T'. a = +r if trans == 'N': blas.trmm(x, a, side='R', m=n, n=n, ldA=n, ldB=n, offsetA=offsetx) else: blas.trmm(x, a, side='L', m=n, n=n, ldA=n, ldB=n, offsetA=offsetx) # x := alpha * (a * r' + r * a') # = alpha * r * (tril(x) + tril(x)') * r' if trans is 'N'. # x := alpha * (a' * r + r' * a) # = alpha * r' * (tril(x)' + tril(x)) * r if trans = 'T'. blas.syr2k(r, a, x, trans=trans, alpha=alpha, n=n, k=n, ldB=n, ldC=n, offsetC=offsetx)
def fsolve(x, y, z): """ Solves the system of equations [ 0 G'*W^{-1} ] [ ux ] = [ bx ] [ G -W' ] [ uz ] [ bz ] """ # Compute bx := bx + G'*W^{-1}*W^{-T}*bz v = matrix(0., (N, 1)) for i in range(N): blas.symv(z, rr, v, ldA=N, offsetA=1, n=N, offsetx=N * i) x[i] += blas.dot(rr, v, n=N, offsetx=N * i) blas.symv(z, q, v, ldA=N, offsetA=1, n=N) x[N] += blas.dot(q, v) + z[0] * W['di'][0]**2 # Solve G'*W^{-1}*W^{-T}*G*ux = bx lapack.potrs(H, x) # Compute bz := -W^{-T}*(bz-G*ux) # z -= G*x z[1::N + 1] -= x[:-1] z -= x[-1] # Apply scaling z[0] *= -W['di'][0] blas.scal(0.5, z, n=N, offset=1, inc=N + 1) tmp = +r blas.trmm(z, tmp, ldA=N, offsetA=1, n=N, m=N) blas.syr2k(r, tmp, z, trans='T', offsetC=1, ldC=N, n=N, k=N, alpha=-1.0)
def cngrnc(r, x, n, xstart=0, alpha=1.0): """ Congruence transformation x := alpha * r'*x*r. r is a square matrix, and x is a symmetric matrix. """ # return alpha*r.T*matrix(x, (n,n))*r # Scale diagonal of x by 1/2. nsqr = n * n xend = xstart + nsqr x[xstart:xend:n + 1] *= 0.5 # a := tril(x)*r a = +r tx = matrix(x[xstart:xend], (n, n)) blas.trmm(tx, a, side='L') # x := alpha*(a*r' + r*a') blas.syr2k(r, a, tx, trans='T', alpha=alpha) x[xstart:xend] = tx[:]
def __scale(L, Y, U, adj=False, inv=False, factored_updates=True): n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr stack = [] for k in reversed(list(snpost)): nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn F = matrix(0.0, (nj, nj)) lapack.lacpy(Y.blkval, F, m=nj, n=nn, ldA=nj, offsetA=blkptr[k], uplo='L') # if supernode k is not a root node: if na > 0: # copy Vk to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, ldB=nj, offsetB=nn * (nj + 1), m=na, n=na, uplo='L') # if supernode k has any children: for ii in range(chptr[k], chptr[k + 1]): i = chidx[ii] if factored_updates: r = relidx[relptr[i]:relptr[i + 1]] stack.append(frontal_get_update_factor(F, r, nn, na)) else: stack.append(frontal_get_update(F, relidx, relptr, i)) # if supernode k is not a root node: if na > 0: if factored_updates: # In this case we have Vk = Lk'*Lk if adj is False: trns = 'N' elif adj is True: trns = 'T' else: # factorize Vk lapack.potrf(F, offsetA=nj * nn + nn, n=na, ldA=nj) # In this case we have Vk = Lk*Lk' if adj is False: trns = 'T' elif adj is True: trns = 'N' if adj is False: tr = ['T', 'N'] elif adj is True: tr = ['N', 'T'] if inv is False: for Ut in U: # symmetrize (1,1) block of Ut_{k} and scale U11 = matrix(0.0, (nn, nn)) lapack.lacpy(Ut.blkval, U11, offsetA=blkptr[k], m=nn, n=nn, ldA=nj, uplo='L') U11 += U11.T U11[::nn + 1] *= 0.5 lapack.lacpy(U11, Ut.blkval, offsetB=blkptr[k], m=nn, n=nn, ldB=nj, uplo='N') blas.trsm(L.blkval, Ut.blkval, side = 'R', transA = tr[0],\ m = nj, n = nn, offsetA = blkptr[k], ldA = nj,\ offsetB = blkptr[k], ldB = nj) blas.trsm(L.blkval, Ut.blkval, m = nn, n = nn, transA = tr[1],\ offsetA = blkptr[k], offsetB = blkptr[k],\ ldA = nj, ldB = nj) # zero-out strict upper triangular part of {Nj,Nj} block for i in range(1, nn): blas.scal(0.0, Ut.blkval, offset=blkptr[k] + nj * i, n=i) if na > 0: blas.trmm(F, Ut.blkval, m = na, n = nn, transA = trns,\ offsetA = nj*nn+nn, ldA = nj,\ offsetB = blkptr[k]+nn, ldB = nj) else: # inv is True for Ut in U: # symmetrize (1,1) block of Ut_{k} and scale U11 = matrix(0.0, (nn, nn)) lapack.lacpy(Ut.blkval, U11, offsetA=blkptr[k], m=nn, n=nn, ldA=nj, uplo='L') U11 += U11.T U11[::nn + 1] *= 0.5 lapack.lacpy(U11, Ut.blkval, offsetB=blkptr[k], m=nn, n=nn, ldB=nj, uplo='N') blas.trmm(L.blkval, Ut.blkval, side = 'R', transA = tr[0],\ m = nj, n = nn, offsetA = blkptr[k], ldA = nj,\ offsetB = blkptr[k], ldB = nj) blas.trmm(L.blkval, Ut.blkval, m = nn, n = nn, transA = tr[1],\ offsetA = blkptr[k], offsetB = blkptr[k],\ ldA = nj, ldB = nj) # zero-out strict upper triangular part of {Nj,Nj} block for i in range(1, nn): blas.scal(0.0, Ut.blkval, offset=blkptr[k] + nj * i, n=i) if na > 0: blas.trsm(F, Ut.blkval, m = na, n = nn, transA = trns,\ offsetA = nj*nn+nn, ldA = nj,\ offsetB = blkptr[k]+nn, ldB = nj) return
def F(W): """ Generate a solver for A'(uz0) = bx[0] -uz0 - uz1 = bx[1] A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 - ux[1] - r1*r1' * uz1 * r1*r1' = bz1. uz0, uz1, bz0, bz1 are symmetric m x m-matrices. ux[0], bx[0] are n-vectors. ux[1], bx[1] are symmetric m x m-matrices. We first calculate a congruence that diagonalizes r0*r0' and r1*r1': U' * r0 * r0' * U = I, U' * r1 * r1' * U = S. We then make a change of variables usx[0] = ux[0], usx[1] = U' * ux[1] * U usz0 = U^-1 * uz0 * U^-T usz1 = U^-1 * uz1 * U^-T and define As() = U' * A() * U' bsx[1] = U^-1 * bx[1] * U^-T bsz0 = U' * bz0 * U bsz1 = U' * bz1 * U. This gives As'(usz0) = bx[0] -usz0 - usz1 = bsx[1] As(usx[0]) - usx[1] - usz0 = bsz0 -usx[1] - S * usz1 * S = bsz1. 1. Eliminate usz0, usz1 using equations 3 and 4, usz0 = As(usx[0]) - usx[1] - bsz0 usz1 = -S^-1 * (usx[1] + bsz1) * S^-1. This gives two equations in usx[0] an usx[1]. As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0) -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1 = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1. 2. Eliminate usx[1] using equation 2: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j], usx[1] = ( S * As(usx[0]) * S ) ./ Gamma + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma. This gives an equation in usx[0]. As'( As(usx[0]) ./ Gamma ) = bx0 + As'(bsz0) + As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma ) = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ). """ # Calculate U s.t. # # U' * r0*r0' * U = I, U' * r1*r1' * U = diag(s). # Cholesky factorization r0 * r0' = L * L' blas.syrk(W['r'][0], L) lapack.potrf(L) # SVD L^-1 * r1 = U * diag(s) * V' blas.copy(W['r'][1], U) blas.trsm(L, U) lapack.gesvd(U, s, jobu='O') # s := s**2 s[:] = s**2 # Uti := U blas.copy(U, Uti) # U := L^-T * U blas.trsm(L, U, transA='T') # Uti := L * Uti = U^-T blas.trmm(L, Uti) # Us := U * diag(s)^-1 blas.copy(U, Us) for i in range(m): blas.tbsv(s, Us, n=m, k=0, ldA=1, incx=m, offsetx=i) # S is m x m with lower triangular entries s[i] * s[j] # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j]) # Upper triangular entries are undefined but nonzero. blas.scal(0.0, S) blas.syrk(s, S) Gamma = 1.0 + S sqrtG = sqrt(Gamma) # Asc[i] = (U' * Ai * * U ) ./ sqrtG, for i = 1, ..., n # = Asi ./ sqrt(Gamma) blas.copy(A, Asc) misc.scale( Asc, # only 'r' part of the dictionary is used { 'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)), 'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)), 'v': [], 'beta': [], 'r': [U], 'rti': [U] }) for i in range(n): blas.tbsv(sqrtG, Asc, n=msq, k=0, ldA=1, offsetx=i * msq) # Convert columns of Asc to packed storage misc.pack2(Asc, {'l': 0, 'q': [], 's': [m]}) # Cholesky factorization of Asc' * Asc. H = matrix(0.0, (n, n)) blas.syrk(Asc, H, trans='T', k=mpckd) lapack.potrf(H) def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans='T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans='T', offsetx=msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans='T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha=-1.0, offsetx=msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n=msq, k=0, ldA=1) blas.axpy(z, x1, n=msq) blas.tbsv(sqrtG, x1, n=msq, k=0, ldA=1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m=mpckd, trans='T', beta=1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m=mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n=msq, k=0, ldA=1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n=msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n=msq, k=0, ldA=1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n=msq, offsety=msq) blas.scal(-1.0, z, offset=msq) __cngrnc(U, z, offsetx=msq) __cngrnc(W['r'][1], z, trans='T', offsetx=msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n=msq, k=0, ldA=1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n=msq) blas.scal(-1.0, z, n=msq) blas.axpy(x[1], z, -1.0, n=msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans='T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1]) return solve
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m)
def trmm(L, B, alpha = 1.0, trans = 'N', nrhs = None, offsetB = 0, ldB = None): r""" Multiplication with sparse triangular matrix. Computes .. math:: B &:= \alpha L B \text{ if trans is 'N'} \\ B &:= \alpha L^T B \text{ if trans is 'T'} where :math:`L` is a :py:class:`cspmatrix` factor. :param L: :py:class:`cspmatrix` factor :param B: matrix :param alpha: float (default: 1.0) :param trans: 'N' or 'T' (default: 'N') :param nrhs: number of right-hand sides (default: number of columns in :math:`B`) :param offsetB: integer (default: 0) :param ldB: leading dimension of :math:`B` (default: number of rows in :math:`B`) """ assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" assert isinstance(B, matrix), "B must be a matrix" if ldB is None: ldB = B.size[0] if nrhs is None: nrhs = B.size[1] assert trans in ['N', 'T'] n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr snode = L.symb.snode chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval p = L.symb.p if p is None: p = range(n) stack = [] if trans is 'N': for k in snpost: nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # extract and scale block from rhs Uk = matrix(0.0,(nj,nrhs)) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): Uk[i,j] = alpha*B[offsetB + j*ldB + p[ir]] blas.trmm(blkval, Uk, m = nn, n = nrhs, offsetA = blkptr[k], ldA = nj) if na > 0: # compute new contribution (to be stacked) blas.gemm(blkval, Uk, Uk, m = na, n = nrhs, k = nn, alpha = 1.0,\ offsetA = blkptr[k]+nn, ldA = nj, offsetC = nn) # add contributions from children for _ in range(chptr[k],chptr[k+1]): Ui, i = stack.pop() r = relidx[relptr[i]:relptr[i+1]] Uk[r,:] += Ui # if k is not a root node if na > 0: stack.append((Uk[nn:,:],k)) # copy block to rhs for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): B[offsetB + j*ldB + p[ir]] = Uk[i,j] else: # trans is 'T' for k in reversed(list(snpost)): nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # extract and scale block from rhs Uk = matrix(0.0,(nj,nrhs)) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): Uk[i,j] = alpha*B[offsetB + j*ldB + p[ir]] # if k is not a root node if na > 0: Uk[nn:,:] = stack.pop() # stack contributions for children for ii in range(chptr[k],chptr[k+1]): i = chidx[ii] stack.append(Uk[relidx[relptr[i]:relptr[i+1]],:]) if na > 0: blas.gemm(blkval, Uk, Uk, alpha = 1.0, beta = 1.0, m = nn, n = nrhs, k = na,\ transA = 'T', offsetA = blkptr[k]+nn, ldA = nj, offsetB = nn) # scale and copy block to rhs blas.trmm(blkval, Uk, transA = 'T', m = nn, n = nrhs, offsetA = blkptr[k], ldA = nj) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): B[offsetB + j*ldB + p[ir]] = Uk[i,j] return
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m)
def spmatrix(self, reordered = True, symmetric = False): """ Converts the :py:class:`cspmatrix` :math:`A` to a sparse matrix. A reordered matrix is returned if the optional argument `reordered` is `True` (default), and otherwise the inverse permutation is applied. Only the default options are allowed if the :py:class:`cspmatrix` :math:`A` represents a Cholesky factor. :param reordered: boolean (default: True) :param symmetric: boolean (default: False) """ n = self.symb.n snptr = self.symb.snptr snode = self.symb.snode relptr = self.symb.relptr snrowidx = self.symb.snrowidx sncolptr = self.symb.sncolptr blkptr = self.symb.blkptr blkval = self.blkval if self.is_factor: if symmetric: raise ValueError("'symmetric = True' not implemented for Cholesky factors") if not reordered: raise ValueError("'reordered = False' not implemented for Cholesky factors") snpost = self.symb.snpost blkval = +blkval for k in snpost: j = snode[snptr[k]] # representative vertex nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| if na == 0: continue nj = na + nn if nn == 1: blas.scal(blkval[blkptr[k]],blkval,offset = blkptr[k]+1,n=na) else: blas.trmm(blkval,blkval, transA = "N", diag = "N", side = "R",uplo = "L", \ m = na, n = nn, ldA = nj, ldB = nj, \ offsetA = blkptr[k],offsetB = blkptr[k] + nn) cc = matrix(0,(n,1)) # count number of nonzeros in each col for k in range(self.symb.Nsn): nn = snptr[k+1]-snptr[k] na = relptr[k+1]-relptr[k] nj = nn + na for i in range(nn): j = snode[snptr[k]+i] cc[j] = nj - i # build col. ptr cp = [0] for i in range(n): cp.append(cp[-1] + cc[i]) cp = matrix(cp) # copy data and row indices val = matrix(0.0, (cp[-1],1)) ri = matrix(0, (cp[-1],1)) for k in range(self.symb.Nsn): nn = snptr[k+1]-snptr[k] na = relptr[k+1]-relptr[k] nj = nn + na for i in range(nn): j = snode[snptr[k]+i] blas.copy(blkval, val, offsetx = blkptr[k]+nj*i+i, offsety = cp[j], n = nj-i) ri[cp[j]:cp[j+1]] = snrowidx[sncolptr[k]+i:sncolptr[k+1]] I = []; J = [] for i in range(n): I += list(ri[cp[i]:cp[i+1]]) J += (cp[i+1]-cp[i])*[i] tmp = spmatrix(val, I, J, (n,n)) # tmp is reordered and lower tril. if reordered or self.symb.p is None: # reordered matrix (do not apply inverse permutation) if not symmetric: return tmp else: return symmetrize(tmp) else: # apply inverse permutation tmp = perm(symmetrize(tmp), self.symb.ip) if symmetric: return tmp else: return tril(tmp)
def trmm(L, B, alpha=1.0, trans='N', nrhs=None, offsetB=0, ldB=None): r""" Multiplication with sparse triangular matrix. Computes .. math:: B &:= \alpha L B \text{ if trans is 'N'} \\ B &:= \alpha L^T B \text{ if trans is 'T'} where :math:`L` is a :py:class:`cspmatrix` factor. :param L: :py:class:`cspmatrix` factor :param B: matrix :param alpha: float (default: 1.0) :param trans: 'N' or 'T' (default: 'N') :param nrhs: number of right-hand sides (default: number of columns in :math:`B`) :param offsetB: integer (default: 0) :param ldB: leading dimension of :math:`B` (default: number of rows in :math:`B`) """ assert isinstance( L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" assert isinstance(B, matrix), "B must be a matrix" if ldB is None: ldB = B.size[0] if nrhs is None: nrhs = B.size[1] assert trans in ['N', 'T'] n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr snode = L.symb.snode chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval p = L.symb.p if p is None: p = range(n) stack = [] if trans is 'N': for k in snpost: nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # extract and scale block from rhs Uk = matrix(0.0, (nj, nrhs)) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): Uk[i, j] = alpha * B[offsetB + j * ldB + p[ir]] blas.trmm(blkval, Uk, m=nn, n=nrhs, offsetA=blkptr[k], ldA=nj) if na > 0: # compute new contribution (to be stacked) blas.gemm(blkval, Uk, Uk, m = na, n = nrhs, k = nn, alpha = 1.0,\ offsetA = blkptr[k]+nn, ldA = nj, offsetC = nn) # add contributions from children for _ in range(chptr[k], chptr[k + 1]): Ui, i = stack.pop() r = relidx[relptr[i]:relptr[i + 1]] Uk[r, :] += Ui # if k is not a root node if na > 0: stack.append((Uk[nn:, :], k)) # copy block to rhs for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): B[offsetB + j * ldB + p[ir]] = Uk[i, j] else: # trans is 'T' for k in reversed(list(snpost)): nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # extract and scale block from rhs Uk = matrix(0.0, (nj, nrhs)) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): Uk[i, j] = alpha * B[offsetB + j * ldB + p[ir]] # if k is not a root node if na > 0: Uk[nn:, :] = stack.pop() # stack contributions for children for ii in range(chptr[k], chptr[k + 1]): i = chidx[ii] stack.append(Uk[relidx[relptr[i]:relptr[i + 1]], :]) if na > 0: blas.gemm(blkval, Uk, Uk, alpha = 1.0, beta = 1.0, m = nn, n = nrhs, k = na,\ transA = 'T', offsetA = blkptr[k]+nn, ldA = nj, offsetB = nn) # scale and copy block to rhs blas.trmm(blkval, Uk, transA='T', m=nn, n=nrhs, offsetA=blkptr[k], ldA=nj) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): B[offsetB + j * ldB + p[ir]] = Uk[i, j] return
def F(W): """ Generate a solver for A'(uz0) = bx[0] -uz0 - uz1 = bx[1] A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 - ux[1] - r1*r1' * uz1 * r1*r1' = bz1. uz0, uz1, bz0, bz1 are symmetric m x m-matrices. ux[0], bx[0] are n-vectors. ux[1], bx[1] are symmetric m x m-matrices. We first calculate a congruence that diagonalizes r0*r0' and r1*r1': U' * r0 * r0' * U = I, U' * r1 * r1' * U = S. We then make a change of variables usx[0] = ux[0], usx[1] = U' * ux[1] * U usz0 = U^-1 * uz0 * U^-T usz1 = U^-1 * uz1 * U^-T and define As() = U' * A() * U' bsx[1] = U^-1 * bx[1] * U^-T bsz0 = U' * bz0 * U bsz1 = U' * bz1 * U. This gives As'(usz0) = bx[0] -usz0 - usz1 = bsx[1] As(usx[0]) - usx[1] - usz0 = bsz0 -usx[1] - S * usz1 * S = bsz1. 1. Eliminate usz0, usz1 using equations 3 and 4, usz0 = As(usx[0]) - usx[1] - bsz0 usz1 = -S^-1 * (usx[1] + bsz1) * S^-1. This gives two equations in usx[0] an usx[1]. As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0) -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1 = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1. 2. Eliminate usx[1] using equation 2: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j], usx[1] = ( S * As(usx[0]) * S ) ./ Gamma + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma. This gives an equation in usx[0]. As'( As(usx[0]) ./ Gamma ) = bx0 + As'(bsz0) + As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma ) = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ). """ # Calculate U s.t. # # U' * r0*r0' * U = I, U' * r1*r1' * U = diag(s). # Cholesky factorization r0 * r0' = L * L' blas.syrk(W['r'][0], L) lapack.potrf(L) # SVD L^-1 * r1 = U * diag(s) * V' blas.copy(W['r'][1], U) blas.trsm(L, U) lapack.gesvd(U, s, jobu = 'O') # s := s**2 s[:] = s**2 # Uti := U blas.copy(U, Uti) # U := L^-T * U blas.trsm(L, U, transA = 'T') # Uti := L * Uti = U^-T blas.trmm(L, Uti) # Us := U * diag(s)^-1 blas.copy(U, Us) for i in range(m): blas.tbsv(s, Us, n = m, k = 0, ldA = 1, incx = m, offsetx = i) # S is m x m with lower triangular entries s[i] * s[j] # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j]) # Upper triangular entries are undefined but nonzero. blas.scal(0.0, S) blas.syrk(s, S) Gamma = 1.0 + S sqrtG = sqrt(Gamma) # Asc[i] = (U' * Ai * * U ) ./ sqrtG, for i = 1, ..., n # = Asi ./ sqrt(Gamma) blas.copy(A, Asc) misc.scale(Asc, # only 'r' part of the dictionary is used {'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)), 'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)), 'v': [], 'beta': [], 'r': [ U ], 'rti': [ U ]}) for i in range(n): blas.tbsv(sqrtG, Asc, n = msq, k = 0, ldA = 1, offsetx = i*msq) # Convert columns of Asc to packed storage misc.pack2(Asc, {'l': 0, 'q': [], 's': [ m ]}) # Cholesky factorization of Asc' * Asc. H = matrix(0.0, (n, n)) blas.syrk(Asc, H, trans = 'T', k = mpckd) lapack.potrf(H) def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans = 'T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans = 'T', offsetx = msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans = 'T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha = -1.0, offsetx = msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n = msq, k = 0, ldA = 1) blas.axpy(z, x1, n = msq) blas.tbsv(sqrtG, x1, n = msq, k = 0, ldA = 1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m = mpckd, trans = 'T', beta = 1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m = mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n = msq, k = 0, ldA = 1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n = msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n = msq, k = 0, ldA = 1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n = msq, offsety = msq) blas.scal(-1.0, z, offset = msq) __cngrnc(U, z, offsetx = msq) __cngrnc(W['r'][1], z, trans = 'T', offsetx = msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n = msq, k = 0, ldA = 1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n = msq) blas.scal(-1.0, z, n = msq) blas.axpy(x[1], z, -1.0, n = msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans = 'T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1]) return solve
def spmatrix(self, reordered=True, symmetric=False): """ Converts the :py:class:`cspmatrix` :math:`A` to a sparse matrix. A reordered matrix is returned if the optional argument `reordered` is `True` (default), and otherwise the inverse permutation is applied. Only the default options are allowed if the :py:class:`cspmatrix` :math:`A` represents a Cholesky factor. :param reordered: boolean (default: True) :param symmetric: boolean (default: False) """ n = self.symb.n snptr = self.symb.snptr snode = self.symb.snode relptr = self.symb.relptr snrowidx = self.symb.snrowidx sncolptr = self.symb.sncolptr blkptr = self.symb.blkptr blkval = self.blkval if self.is_factor: if symmetric: raise ValueError( "'symmetric = True' not implemented for Cholesky factors") if not reordered: raise ValueError( "'reordered = False' not implemented for Cholesky factors") snpost = self.symb.snpost blkval = +blkval for k in snpost: j = snode[snptr[k]] # representative vertex nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| if na == 0: continue nj = na + nn if nn == 1: blas.scal(blkval[blkptr[k]], blkval, offset=blkptr[k] + 1, n=na) else: blas.trmm(blkval,blkval, transA = "N", diag = "N", side = "R",uplo = "L", \ m = na, n = nn, ldA = nj, ldB = nj, \ offsetA = blkptr[k],offsetB = blkptr[k] + nn) cc = matrix(0, (n, 1)) # count number of nonzeros in each col for k in range(self.symb.Nsn): nn = snptr[k + 1] - snptr[k] na = relptr[k + 1] - relptr[k] nj = nn + na for i in range(nn): j = snode[snptr[k] + i] cc[j] = nj - i # build col. ptr cp = [0] for i in range(n): cp.append(cp[-1] + cc[i]) cp = matrix(cp) # copy data and row indices val = matrix(0.0, (cp[-1], 1)) ri = matrix(0, (cp[-1], 1)) for k in range(self.symb.Nsn): nn = snptr[k + 1] - snptr[k] na = relptr[k + 1] - relptr[k] nj = nn + na for i in range(nn): j = snode[snptr[k] + i] blas.copy(blkval, val, offsetx=blkptr[k] + nj * i + i, offsety=cp[j], n=nj - i) ri[cp[j]:cp[j + 1]] = snrowidx[sncolptr[k] + i:sncolptr[k + 1]] I = [] J = [] for i in range(n): I += list(ri[cp[i]:cp[i + 1]]) J += (cp[i + 1] - cp[i]) * [i] tmp = spmatrix(val, I, J, (n, n)) # tmp is reordered and lower tril. if reordered or self.symb.p is None: # reordered matrix (do not apply inverse permutation) if not symmetric: return tmp else: return symmetrize(tmp) else: # apply inverse permutation tmp = perm(symmetrize(tmp), self.symb.ip) if symmetric: return tmp else: return tril(tmp)
def scale(x, W, trans='N', inverse='N'): """ Applies Nesterov-Todd scaling or its inverse. Computes x := W*x (trans is 'N', inverse = 'N') x := W^T*x (trans is 'T', inverse = 'N') x := W^{-1}*x (trans is 'N', inverse = 'I') x := W^{-T}*x (trans is 'T', inverse = 'I'). x is a dense 'd' matrix. W is a dictionary with entries: - W['dnl']: positive vector - W['dnli']: componentwise inverse of W['dnl'] - W['d']: positive vector - W['di']: componentwise inverse of W['d'] - W['v']: lists of 2nd order cone vectors with unit hyperbolic norms - W['beta']: list of positive numbers - W['r']: list of square matrices - W['rti']: list of square matrices. rti[k] is the inverse transpose of r[k]. The 'dnl' and 'dnli' entries are optional, and only present when the function is called from the nonlinear solver. """ from cvxopt import blas ind = 0 # Scaling for nonlinear component xk is xk := dnl .* xk; inverse # scaling is xk ./ dnl = dnli .* xk, where dnl = W['dnl'], # dnli = W['dnli']. if 'dnl' in W: if inverse == 'N': w = W['dnl'] else: w = W['dnli'] for k in range(x.size[1]): blas.tbmv(w, x, n=w.size[0], k=0, ldA=1, offsetx=k * x.size[0]) ind += w.size[0] # Scaling for linear 'l' component xk is xk := d .* xk; inverse # scaling is xk ./ d = di .* xk, where d = W['d'], di = W['di']. if inverse == 'N': w = W['d'] else: w = W['di'] for k in range(x.size[1]): blas.tbmv(w, x, n=w.size[0], k=0, ldA=1, offsetx=k * x.size[0] + ind) ind += w.size[0] # Scaling for 'q' component is # # xk := beta * (2*v*v' - J) * xk # = beta * (2*v*(xk'*v)' - J*xk) # # where beta = W['beta'][k], v = W['v'][k], J = [1, 0; 0, -I]. # # Inverse scaling is # # xk := 1/beta * (2*J*v*v'*J - J) * xk # = 1/beta * (-J) * (2*v*((-J*xk)'*v)' + xk). w = matrix(0.0, (x.size[1], 1)) for k in range(len(W['v'])): v = W['v'][k] m = v.size[0] if inverse == 'I': blas.scal(-1.0, x, offset=ind, inc=x.size[0]) blas.gemv(x, v, w, trans='T', m=m, n=x.size[1], offsetA=ind, ldA=x.size[0]) blas.scal(-1.0, x, offset=ind, inc=x.size[0]) blas.ger(v, w, x, alpha=2.0, m=m, n=x.size[1], ldA=x.size[0], offsetA=ind) if inverse == 'I': blas.scal(-1.0, x, offset=ind, inc=x.size[0]) a = 1.0 / W['beta'][k] else: a = W['beta'][k] for i in range(x.size[1]): blas.scal(a, x, n=m, offset=ind + i * x.size[0]) ind += m # Scaling for 's' component xk is # # xk := vec( r' * mat(xk) * r ) if trans = 'N' # xk := vec( r * mat(xk) * r' ) if trans = 'T'. # # r is kth element of W['r']. # # Inverse scaling is # # xk := vec( rti * mat(xk) * rti' ) if trans = 'N' # xk := vec( rti' * mat(xk) * rti ) if trans = 'T'. # # rti is kth element of W['rti']. maxn = max([0] + [r.size[0] for r in W['r']]) a = matrix(0.0, (maxn, maxn)) for k in range(len(W['r'])): if inverse == 'N': r = W['r'][k] if trans == 'N': t = 'T' else: t = 'N' else: r = W['rti'][k] t = trans n = r.size[0] for i in range(x.size[1]): # scale diagonal of xk by 0.5 blas.scal(0.5, x, offset=ind + i * x.size[0], inc=n + 1, n=n) # a = r*tril(x) (t is 'N') or a = tril(x)*r (t is 'T') blas.copy(r, a) if t == 'N': blas.trmm(x, a, side='R', m=n, n=n, ldA=n, ldB=n, offsetA=ind + i * x.size[0]) else: blas.trmm(x, a, side='L', m=n, n=n, ldA=n, ldB=n, offsetA=ind + i * x.size[0]) # x := (r*a' + a*r') if t is 'N' # x := (r'*a + a'*r) if t is 'T' blas.syr2k(r, a, x, trans=t, n=n, k=n, ldB=n, ldC=n, offsetC=ind + i * x.size[0]) ind += n ** 2
pylab.figure(1, facecolor='w', figsize=(6,6)) pylab.plot(V[0,:], V[1,:],'ow', mec='k') pylab.plot([0], [0], 'k+') I = [ k for k in range(n) if xd[k] > 1e-5 ] pylab.plot(V[0,I], V[1,I],'or') # Enclosing ellipse is {x | x' * (V*diag(xe)*V')^-1 * x = sqrt(2)} nopts = 1000 angles = matrix( [ a*2.0*pi/nopts for a in range(nopts) ], (1,nopts) ) circle = matrix(0.0, (2,nopts)) circle[0,:], circle[1,:] = cos(angles), sin(angles) W = V * spdiag(xd) * V.T lapack.potrf(W) ellipse = sqrt(2.0) * circle blas.trmm(W, ellipse) if pylab_installed: pylab.plot(ellipse[0,:].T, ellipse[1,:].T, 'k--') pylab.axis([-5, 5, -5, 5]) pylab.title('D-optimal design (fig. 7.9)') pylab.axis('off') # E-design. # # maximize w # subject to w*I <= V*diag(x)*V' # x >= 0 # sum(x) = 1 novars = n+1