def g(x, y, z): # Solve # # [ K d3 ] [ ux_y ] # [ ] [ ] = # [ d3' 1'*d3 ] [ ux_b ] # # [ bx_y ] [ D ] # [ ] - [ ] * D3 * (D2 * bx_v + bx_z - bx_w). # [ bx_b ] [ d' ] x[:N] -= mul(d, mul(d3, mul(d2, x[-N:]) + z[:N] - z[-N:])) x[N] -= blas.dot(d, mul(d3, mul(d2, x[-N:]) + z[:N] - z[-N:])) # Solve dy1 := K^-1 * x[:N] blas.copy(x, dy1, n=N) chompack.trsm(L, dy1, trans='N') chompack.trsm(L, dy1, trans='T') # Find ux_y = dy1 - ux_b * dy2 s.t # # d3' * ( dy1 - ux_b * dy2 + ux_b ) = x[N] # # i.e. x[N] := ( x[N] - d3'* dy1 ) / ( d3'* ( 1 - dy2 ) ). x[N] = ( x[N] - blas.dot(d3, dy1) ) / \ ( blas.asum(d3) - blas.dot(d3, dy2) ) x[:N] = dy1 - x[N] * dy2 # ux_v = D4 * ( bx_v - D1^-1 (bz_z + D * (ux_y + ux_b)) # - D2^-1 * bz_w ) x[-N:] = mul( d4, x[-N:] - div(z[:N] + mul(d, x[:N] + x[N]), d1) - div(z[N:], d2)) # uz_z = - D1^-1 * ( bx_z - D * ( ux_y + ux_b ) - ux_v ) # uz_w = - D2^-1 * ( bx_w - uz_w ) z[:N] += base.mul(d, x[:N] + x[N]) + x[-N:] z[-N:] += x[-N:] blas.scal(-1.0, z) # Return W['di'] * uz blas.tbmv(W['di'], z, n=2 * N, k=0, ldA=1)
def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans='T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans='T', offsetx=msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans='T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha=-1.0, offsetx=msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n=msq, k=0, ldA=1) blas.axpy(z, x1, n=msq) blas.tbsv(sqrtG, x1, n=msq, k=0, ldA=1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m=mpckd, trans='T', beta=1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m=mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n=msq, k=0, ldA=1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n=msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n=msq, k=0, ldA=1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n=msq, offsety=msq) blas.scal(-1.0, z, offset=msq) __cngrnc(U, z, offsetx=msq) __cngrnc(W['r'][1], z, trans='T', offsetx=msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n=msq, k=0, ldA=1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n=msq) blas.scal(-1.0, z, n=msq) blas.axpy(x[1], z, -1.0, n=msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans='T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1])
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m = q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n = q, uplo = 'L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m = p, offsetA = q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n = p, uplo = 'L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB = 'T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu = 'A', jobvt = 'A', U = V2, Vt = V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA = 'T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side = 'R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q+1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA = 'T', m = p, n = q, k = p, ldB = p, offsetB = i*p*q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB = 'T', m = p, n = q, k = q, ldC = p, offsetC = i*p*q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha = -1.0, beta = 1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n = m, k = 0, ldA = 1, offsetx = k*m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans = 'T', alpha = 2.0) blas.syrk(Gs, H, trans = 'T', beta = 1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m) return f
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m)
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m=q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n=q, uplo='L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m=p, offsetA=q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n=p, uplo='L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB='T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu='A', jobvt='A', U=V2, Vt=V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA='T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side='R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q + 1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA='T', m=p, n=q, k=p, ldB=p, offsetB=i * p * q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB='T', m=p, n=q, k=q, ldC=p, offsetC=i * p * q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha=-1.0, beta=1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n=m, k=0, ldA=1, offsetx=k * m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans='T', alpha=2.0) blas.syrk(Gs, H, trans='T', beta=1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m) return f
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m)
def f(x, y, z): """ 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### # xk := Dk^2 * xk + zk # = Dk^2 * bxk + bzk blas.tbmv(dsq, x, n=N * m, k=0, ldA=1) blas.axpy(z, x) # y := -y + sum_k ( I - Dk^2 * X * Hk^-1 * X' ) * xk # = -y + x*ones - sum_k Dk^2 * X * Hk^-1 * X' * xk # y := -y + x*ones blas.gemv(x, ones, y, alpha=1.0, beta=-1.0) # wnm = X' * x (wnm interpreted as an n x m matrix) blas.gemm(X, x, wnm, m=n, k=N, n=m, transA='T', ldB=N, ldC=n) # wnm[:,k] = Hk \ wnm[:,k] (for wnm as an n x m matrix) for k in range(m): lapack.potrs(H[k], wnm, offsetB=k * n) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=n * k) # wN = Dk^2 * wN blas.tbmv(dsq[:, k], wN, n=N, k=0, ldA=1) # y := y - wN blas.axpy(wN, y, -1.0) # y = D^-1 * (I + Y * S^-1 * Y') * D^-1 * y # # Y = [Y1 ... Ym ], Yk = D^-1 * Dk^2 * X * Lk^-T. # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # wnm = Y' * y (interpreted as an Nm vector) # = [ L1^-1 * X' * D1^2 * D^-1 * y; # L2^-1 * X' * D2^2 * D^-1 * y; # ... # Lm^-1 * X' * Dm^2 * D^-1 * y ] for k in range(m): # wN = D^-1 * Dk^2 * y blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # wnm[:,k] = X' * wN blas.gemv(X, wN, wnm, trans='T', offsety=k * n) # wnm[:,k] = Lk^-1 * wnm[:,k] blas.trsv(H[k], wnm, offsetx=k * n) # wnm := S^-1 * wnm (an mn-vector) lapack.potrs(S, wnm) # y := y + Y * wnm # = y + D^-1 * [ D1^2 * X * L1^-T ... D2^k * X * Lk^-T] # * wnm for k in range(m): # wnm[:,k] = Lk^-T * wnm[:,k] blas.trsv(H[k], wnm, trans='T', offsetx=k * n) # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = D^-1 * Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # y += wN blas.axpy(wN, y) # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # For k = 1, ..., m: # # xk = (I - Dk^2 * X * Hk^-1 * X') * (-Dk^2 * y + xk) # x = x - [ D1^2 * y ... Dm^2 * y] (as an N x m matrix) for k in range(m): blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.axpy(wN, x, -1.0, offsety=k * N) # wnm = X' * x (as an n x m matrix) blas.gemm(X, x, wnm, transA='T', m=n, n=m, k=N, ldB=N, ldC=n) # wnm[:,k] = Hk^-1 * wnm[:,k] for k in range(m): lapack.potrs(H[k], wnm, offsetB=n * k) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) # x[:,k] := x[:,k] - wN blas.axpy(wN, x, -1.0, n=N, offsety=k * N) # z := ( x - z ) ./ d blas.axpy(x, z, -1.0) blas.scal(-1.0, z) blas.tbsv(d, z, n=N * m, k=0, ldA=1) ### utime, stime = cputime() print("Solve: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0))
def scale(x, W, trans='N', inverse='N'): """ Applies Nesterov-Todd scaling or its inverse. Computes x := W*x (trans is 'N', inverse = 'N') x := W^T*x (trans is 'T', inverse = 'N') x := W^{-1}*x (trans is 'N', inverse = 'I') x := W^{-T}*x (trans is 'T', inverse = 'I'). x is a dense 'd' matrix. W is a dictionary with entries: - W['dnl']: positive vector - W['dnli']: componentwise inverse of W['dnl'] - W['d']: positive vector - W['di']: componentwise inverse of W['d'] - W['v']: lists of 2nd order cone vectors with unit hyperbolic norms - W['beta']: list of positive numbers - W['r']: list of square matrices - W['rti']: list of square matrices. rti[k] is the inverse transpose of r[k]. The 'dnl' and 'dnli' entries are optional, and only present when the function is called from the nonlinear solver. """ from cvxopt import blas ind = 0 # Scaling for nonlinear component xk is xk := dnl .* xk; inverse # scaling is xk ./ dnl = dnli .* xk, where dnl = W['dnl'], # dnli = W['dnli']. if 'dnl' in W: if inverse == 'N': w = W['dnl'] else: w = W['dnli'] for k in range(x.size[1]): blas.tbmv(w, x, n=w.size[0], k=0, ldA=1, offsetx=k * x.size[0]) ind += w.size[0] # Scaling for linear 'l' component xk is xk := d .* xk; inverse # scaling is xk ./ d = di .* xk, where d = W['d'], di = W['di']. if inverse == 'N': w = W['d'] else: w = W['di'] for k in range(x.size[1]): blas.tbmv(w, x, n=w.size[0], k=0, ldA=1, offsetx=k * x.size[0] + ind) ind += w.size[0] # Scaling for 'q' component is # # xk := beta * (2*v*v' - J) * xk # = beta * (2*v*(xk'*v)' - J*xk) # # where beta = W['beta'][k], v = W['v'][k], J = [1, 0; 0, -I]. # # Inverse scaling is # # xk := 1/beta * (2*J*v*v'*J - J) * xk # = 1/beta * (-J) * (2*v*((-J*xk)'*v)' + xk). w = matrix(0.0, (x.size[1], 1)) for k in range(len(W['v'])): v = W['v'][k] m = v.size[0] if inverse == 'I': blas.scal(-1.0, x, offset=ind, inc=x.size[0]) blas.gemv(x, v, w, trans='T', m=m, n=x.size[1], offsetA=ind, ldA=x.size[0]) blas.scal(-1.0, x, offset=ind, inc=x.size[0]) blas.ger(v, w, x, alpha=2.0, m=m, n=x.size[1], ldA=x.size[0], offsetA=ind) if inverse == 'I': blas.scal(-1.0, x, offset=ind, inc=x.size[0]) a = 1.0 / W['beta'][k] else: a = W['beta'][k] for i in range(x.size[1]): blas.scal(a, x, n=m, offset=ind + i * x.size[0]) ind += m # Scaling for 's' component xk is # # xk := vec( r' * mat(xk) * r ) if trans = 'N' # xk := vec( r * mat(xk) * r' ) if trans = 'T'. # # r is kth element of W['r']. # # Inverse scaling is # # xk := vec( rti * mat(xk) * rti' ) if trans = 'N' # xk := vec( rti' * mat(xk) * rti ) if trans = 'T'. # # rti is kth element of W['rti']. maxn = max([0] + [r.size[0] for r in W['r']]) a = matrix(0.0, (maxn, maxn)) for k in range(len(W['r'])): if inverse == 'N': r = W['r'][k] if trans == 'N': t = 'T' else: t = 'N' else: r = W['rti'][k] t = trans n = r.size[0] for i in range(x.size[1]): # scale diagonal of xk by 0.5 blas.scal(0.5, x, offset=ind + i * x.size[0], inc=n + 1, n=n) # a = r*tril(x) (t is 'N') or a = tril(x)*r (t is 'T') blas.copy(r, a) if t == 'N': blas.trmm(x, a, side='R', m=n, n=n, ldA=n, ldB=n, offsetA=ind + i * x.size[0]) else: blas.trmm(x, a, side='L', m=n, n=n, ldA=n, ldB=n, offsetA=ind + i * x.size[0]) # x := (r*a' + a*r') if t is 'N' # x := (r'*a + a'*r) if t is 'T' blas.syr2k(r, a, x, trans=t, n=n, k=n, ldB=n, ldC=n, offsetC=ind + i * x.size[0]) ind += n ** 2
def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans = 'T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans = 'T', offsetx = msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans = 'T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha = -1.0, offsetx = msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n = msq, k = 0, ldA = 1) blas.axpy(z, x1, n = msq) blas.tbsv(sqrtG, x1, n = msq, k = 0, ldA = 1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m = mpckd, trans = 'T', beta = 1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m = mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n = msq, k = 0, ldA = 1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n = msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n = msq, k = 0, ldA = 1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n = msq, offsety = msq) blas.scal(-1.0, z, offset = msq) __cngrnc(U, z, offsetx = msq) __cngrnc(W['r'][1], z, trans = 'T', offsetx = msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n = msq, k = 0, ldA = 1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n = msq) blas.scal(-1.0, z, n = msq) blas.axpy(x[1], z, -1.0, n = msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans = 'T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1])
def kkt(W): """ KKT solver for X*X' * ux + uy * 1_m' + mat(uz) = bx ux * 1_m = by ux - d.^2 .* mat(uz) = mat(bz). ux and bx are N x m matrices. uy and by are N-vectors. uz and bz are N*m-vectors. mat(uz) is the N x m matrix that satisfies mat(uz)[:] = uz. d = mat(W['d']) a positive N x m matrix. If we eliminate uz from the last equation using mat(uz) = (ux - mat(bz)) ./ d.^2 we get two equations in ux, uy: X*X' * ux + ux ./ d.^2 + uy * 1_m' = bx + mat(bz) ./ d.^2 ux * 1_m = by. From the 1st equation, uxk = (X*X' + Dk^-2)^-1 * (-uy + bxk + Dk^-2 * bzk) = Dk * (I + Xk*Xk')^-1 * Dk * (-uy + bxk + Dk^-2 * bzk) for k = 1, ..., m, where Dk = diag(d[:,k]), Xk = Dk * X, uxk is column k of ux, and bzk is column k of mat(bz). We use the matrix inversion lemma ( I + Xk * Xk' )^-1 = I - Xk * (I + Xk' * Xk)^-1 * Xk' = I - Xk * Hk^-1 * Xk' = I - Xk * Lk^-T * Lk^-1 * Xk' where Hk = I + Xk' * Xk = Lk * Lk' to write this as uxk = Dk * (I - Xk * Hk^-1 * Xk') * Dk * (-uy + bxk + Dk^-2 * bzk) = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk). Substituting this in the second equation gives an equation for uy: sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2 ) * uy = -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 * bzk ), i.e., with D = (sum_k Dk^2)^1/2, Yk = D^-1 * Dk^2 * X * Lk^-T, D * ( I - sum_k Yk * Yk' ) * D * uy = -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ). Another application of the matrix inversion lemma gives uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k ( Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2 ) * ( bxk + Dk^-2 *bzk ) ) with S = I - Y' * Y, Y = [ Y1 ... Ym ]. Summary: 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### d = matrix(W['d'], (N, m)) dsq = matrix(W['d']**2, (N, m)) # Factor the matrices # # H[k] = I + Xk' * Xk # = I + X' * Dk^2 * X. # # Dk = diag(d[:,k]). for k in range(m): # H[k] = I blas.scal(0.0, H[k]) H[k][::n + 1] = 1.0 # Xs = Dk * X # = diag(d[:,k]]) * X blas.copy(X, Xs) for j in range(n): blas.tbmv(d, Xs, n=N, k=0, ldA=1, offsetA=k * N, offsetx=j * N) # H[k] := H[k] + Xs' * Xs # = I + Xk' * Xk blas.syrk(Xs, H[k], trans='T', beta=1.0) # Factorization H[k] = Lk * Lk' lapack.potrf(H[k]) ### utime, stime = cputime() print("Factor Hk's: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # diag(D) = ( sum_k d[:,k]**2 ) ** 1/2 # = ( sum_k Dk^2) ** 1/2. blas.gemv(dsq, ones, D) D[:] = sqrt(D) ### # utime, stime = cputime() # print("Compute D: utime = %.2f, stime = %.2f" \ # %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # S = I - Y'* Y is an m x m block matrix. # The i,j block of Y' * Y is # # Yi' * Yj = Li^-1 * X' * Di^2 * D^-1 * Dj^2 * X * Lj^-T. # # We compute only the lower triangular blocks in Y'*Y. blas.scal(0.0, S) for i in range(m): for j in range(i + 1): # Xs = Di * Dj * D^-1 * X blas.copy(X, Xs) blas.copy(d, wN, n=N, offsetx=i * N) blas.tbmv(d, wN, n=N, k=0, ldA=1, offsetA=j * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) for k in range(n): blas.tbmv(wN, Xs, n=N, k=0, ldA=1, offsetx=k * N) # block i, j of S is Xs' * Xs (as nonsymmetric matrix so we # get the correct multiple after scaling with Li, Lj) blas.gemm(Xs, Xs, S, transA='T', ldC=m * n, offsetC=(j * n) * m * n + i * n) ### utime, stime = cputime() print("Form S: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### for i in range(m): # multiply block row i of S on the left with Li^-1 blas.trsm(H[i], S, m=n, n=(i + 1) * n, ldB=m * n, offsetB=i * n) # multiply block column i of S on the right with Li^-T blas.trsm(H[i], S, side='R', transA='T', m=(m - i) * n, n=n, ldB=m * n, offsetB=i * n * (m * n + 1)) blas.scal(-1.0, S) S[::(m * n + 1)] += 1.0 ### utime, stime = cputime() print("Form S (2): utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # S = L*L' lapack.potrf(S) ### utime, stime = cputime() print("Factor S: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### def f(x, y, z): """ 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### # xk := Dk^2 * xk + zk # = Dk^2 * bxk + bzk blas.tbmv(dsq, x, n=N * m, k=0, ldA=1) blas.axpy(z, x) # y := -y + sum_k ( I - Dk^2 * X * Hk^-1 * X' ) * xk # = -y + x*ones - sum_k Dk^2 * X * Hk^-1 * X' * xk # y := -y + x*ones blas.gemv(x, ones, y, alpha=1.0, beta=-1.0) # wnm = X' * x (wnm interpreted as an n x m matrix) blas.gemm(X, x, wnm, m=n, k=N, n=m, transA='T', ldB=N, ldC=n) # wnm[:,k] = Hk \ wnm[:,k] (for wnm as an n x m matrix) for k in range(m): lapack.potrs(H[k], wnm, offsetB=k * n) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=n * k) # wN = Dk^2 * wN blas.tbmv(dsq[:, k], wN, n=N, k=0, ldA=1) # y := y - wN blas.axpy(wN, y, -1.0) # y = D^-1 * (I + Y * S^-1 * Y') * D^-1 * y # # Y = [Y1 ... Ym ], Yk = D^-1 * Dk^2 * X * Lk^-T. # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # wnm = Y' * y (interpreted as an Nm vector) # = [ L1^-1 * X' * D1^2 * D^-1 * y; # L2^-1 * X' * D2^2 * D^-1 * y; # ... # Lm^-1 * X' * Dm^2 * D^-1 * y ] for k in range(m): # wN = D^-1 * Dk^2 * y blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # wnm[:,k] = X' * wN blas.gemv(X, wN, wnm, trans='T', offsety=k * n) # wnm[:,k] = Lk^-1 * wnm[:,k] blas.trsv(H[k], wnm, offsetx=k * n) # wnm := S^-1 * wnm (an mn-vector) lapack.potrs(S, wnm) # y := y + Y * wnm # = y + D^-1 * [ D1^2 * X * L1^-T ... D2^k * X * Lk^-T] # * wnm for k in range(m): # wnm[:,k] = Lk^-T * wnm[:,k] blas.trsv(H[k], wnm, trans='T', offsetx=k * n) # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = D^-1 * Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # y += wN blas.axpy(wN, y) # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # For k = 1, ..., m: # # xk = (I - Dk^2 * X * Hk^-1 * X') * (-Dk^2 * y + xk) # x = x - [ D1^2 * y ... Dm^2 * y] (as an N x m matrix) for k in range(m): blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.axpy(wN, x, -1.0, offsety=k * N) # wnm = X' * x (as an n x m matrix) blas.gemm(X, x, wnm, transA='T', m=n, n=m, k=N, ldB=N, ldC=n) # wnm[:,k] = Hk^-1 * wnm[:,k] for k in range(m): lapack.potrs(H[k], wnm, offsetB=n * k) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) # x[:,k] := x[:,k] - wN blas.axpy(wN, x, -1.0, n=N, offsety=k * N) # z := ( x - z ) ./ d blas.axpy(x, z, -1.0) blas.scal(-1.0, z) blas.tbsv(d, z, n=N * m, k=0, ldA=1) ### utime, stime = cputime() print("Solve: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) ### return f
def solve(x, y, z): """ Returns solution of rho * ux + A'(uy) - r^-T * uz * r^-1 = bx A(ux) = by -ux - r * uz * r' = bz. On entry, x = bx, y = by, z = bz. On exit, x = ux, y = uy, z = uz. """ # bz is a copy of z in the format of x blas.copy(z, bz) blas.axpy(bz, x, alpha=rho) # x := Gamma .* (u' * x * u) # = Gamma .* (u' * (bx + rho * bz) * u) cngrnc(U, x, trans='T', offsetx=0) blas.tbmv(Gamma, x, n=ns2, k=0, ldA=1, offsetx=0) # y := y - As(x) # := by - As( Gamma .* u' * (bx + rho * bz) * u) #blas.copy(x,xp) #pack_ip(xp,n = ns,m=1,nl=nl) misc.pack(x, xp, {'l': 0, 'q': [], 's': [ns]}) blas.gemv(Aspkd, xp, y, trans = 'T',alpha = -1.0, beta = 1.0, \ m = ns*(ns+1)/2, n = ms,offsetx = 0) # y := -y - A(bz) # = -by - A(bz) + As(Gamma .* (u' * (bx + rho * bz) * u) Af(bz, y, alpha=-1.0, beta=-1.0) # y := H^-1 * y # = H^-1 ( -by - A(bz) + As(Gamma.* u'*(bx + rho*bz)*u) ) # = uy blas.trsv(H, y) blas.trsv(H, y, trans='T') # bz = Vt' * vz * Vt # = uz where # vz := Gamma .* ( As'(uy) - x ) # = Gamma .* ( As'(uy) - Gamma .* (u'*(bx + rho *bz)*u) ) # = Gamma.^2 .* ( u' * (A'(uy) - bx - rho * bz) * u ). #blas.copy(x,xp) #pack_ip(xp,n=ns,m=1,nl=nl) misc.pack(x, xp, {'l': 0, 'q': [], 's': [ns]}) blas.scal(-1.0, xp) blas.gemv(Aspkd, y, xp, alpha=1.0, beta=1.0, m=ns * (ns + 1) / 2, n=ms, offsety=0) # bz[j] is xp unpacked and multiplied with Gamma misc.unpack(xp, bz, {'l': 0, 'q': [], 's': [ns]}) blas.tbmv(Gamma, bz, n=ns2, k=0, ldA=1, offsetx=0) # bz = Vt' * bz * Vt # = uz cngrnc(Vt, bz, trans='T', offsetx=0) symmetrize(bz, ns, offset=0) # x = -bz - r * uz * r' # z contains r.h.s. bz; copy to x blas.copy(z, x) blas.copy(bz, z) cngrnc(W['r'][0], bz, offsetx=0) blas.axpy(bz, x) blas.scal(-1.0, x)
def F(W): # SVD R[j] = U[j] * diag(sig[j]) * Vt[j] lapack.gesvd(+W['r'][0], sv, jobu='A', jobvt='A', U=U, Vt=Vt) # Vt[j] := diag(sig[j])^-1 * Vt[j] for k in xrange(ns): blas.tbsv(sv, Vt, n=ns, k=0, ldA=1, offsetx=k * ns) # Gamma[j] is an ns[j] x ns[j] symmetric matrix # # (sig[j] * sig[j]') ./ sqrt(1 + rho * (sig[j] * sig[j]').^2) # S = sig[j] * sig[j]' S = matrix(0.0, (ns, ns)) blas.syrk(sv, S) Gamma = div(S, sqrt(1.0 + rho * S**2)) symmetrize(Gamma, ns) # As represents the scaled mapping # # As(x) = A(u * (Gamma .* x) * u') # As'(y) = Gamma .* (u' * A'(y) * u) # # stored in a similar format as A, except that we use packed # storage for the columns of As[i][j]. if type(A) is spmatrix: blas.scal(0.0, As) try: As[VecAIndex] = +A['s'][VecAIndex] except: As[VecAIndex] = +A[VecAIndex] else: blas.copy(A, As) # As[i][j][:,k] = diag( diag(Gamma[j]))*As[i][j][:,k] # As[i][j][l,:] = Gamma[j][l,l]*As[i][j][l,:] for k in xrange(ms): cngrnc(U, As, trans='T', offsetx=k * (ns2)) blas.tbmv(Gamma, As, n=ns2, k=0, ldA=1, offsetx=k * (ns2)) misc.pack(As, Aspkd, {'l': 0, 'q': [], 's': [ns] * ms}) # H is an m times m block matrix with i, k block # # Hik = sum_j As[i,j]' * As[k,j] # # of size ms[i] x ms[k]. Hik = 0 if As[i,j] or As[k,j] # are zero for all j H = matrix(0.0, (ms, ms)) blas.syrk(Aspkd, H, trans='T', beta=1.0, k=ns * (ns + 1) / 2) lapack.potrf(H) def solve(x, y, z): """ Returns solution of rho * ux + A'(uy) - r^-T * uz * r^-1 = bx A(ux) = by -ux - r * uz * r' = bz. On entry, x = bx, y = by, z = bz. On exit, x = ux, y = uy, z = uz. """ # bz is a copy of z in the format of x blas.copy(z, bz) blas.axpy(bz, x, alpha=rho) # x := Gamma .* (u' * x * u) # = Gamma .* (u' * (bx + rho * bz) * u) cngrnc(U, x, trans='T', offsetx=0) blas.tbmv(Gamma, x, n=ns2, k=0, ldA=1, offsetx=0) # y := y - As(x) # := by - As( Gamma .* u' * (bx + rho * bz) * u) #blas.copy(x,xp) #pack_ip(xp,n = ns,m=1,nl=nl) misc.pack(x, xp, {'l': 0, 'q': [], 's': [ns]}) blas.gemv(Aspkd, xp, y, trans = 'T',alpha = -1.0, beta = 1.0, \ m = ns*(ns+1)/2, n = ms,offsetx = 0) # y := -y - A(bz) # = -by - A(bz) + As(Gamma .* (u' * (bx + rho * bz) * u) Af(bz, y, alpha=-1.0, beta=-1.0) # y := H^-1 * y # = H^-1 ( -by - A(bz) + As(Gamma.* u'*(bx + rho*bz)*u) ) # = uy blas.trsv(H, y) blas.trsv(H, y, trans='T') # bz = Vt' * vz * Vt # = uz where # vz := Gamma .* ( As'(uy) - x ) # = Gamma .* ( As'(uy) - Gamma .* (u'*(bx + rho *bz)*u) ) # = Gamma.^2 .* ( u' * (A'(uy) - bx - rho * bz) * u ). #blas.copy(x,xp) #pack_ip(xp,n=ns,m=1,nl=nl) misc.pack(x, xp, {'l': 0, 'q': [], 's': [ns]}) blas.scal(-1.0, xp) blas.gemv(Aspkd, y, xp, alpha=1.0, beta=1.0, m=ns * (ns + 1) / 2, n=ms, offsety=0) # bz[j] is xp unpacked and multiplied with Gamma misc.unpack(xp, bz, {'l': 0, 'q': [], 's': [ns]}) blas.tbmv(Gamma, bz, n=ns2, k=0, ldA=1, offsetx=0) # bz = Vt' * bz * Vt # = uz cngrnc(Vt, bz, trans='T', offsetx=0) symmetrize(bz, ns, offset=0) # x = -bz - r * uz * r' # z contains r.h.s. bz; copy to x blas.copy(z, x) blas.copy(bz, z) cngrnc(W['r'][0], bz, offsetx=0) blas.axpy(bz, x) blas.scal(-1.0, x) return solve
def solve(x, y, z): """ Returns solution of rho * ux + A'(uy) - r^-T * uz * r^-1 = bx A(ux) = by -ux - r * uz * r' = bz. On entry, x = bx, y = by, z = bz. On exit, x = ux, y = uy, z = uz. """ # bz is a copy of z in the format of x blas.copy(z, bz) blas.axpy(bz, x, alpha=rho, offsetx=nl, offsety=nl) # x := Gamma .* (u' * x * u) # = Gamma .* (u' * (bx + rho * bz) * u) cngrnc(U, x, trans='T', offsetx=nl) blas.tbmv(Gamma, x, n=ns2, k=0, ldA=1, offsetx=nl) blas.tbmv(+W['d'], x, n=nl, k=0, ldA=1) # y := y - As(x) # := by - As( Gamma .* u' * (bx + rho * bz) * u) misc.pack(x, xp, dims) blas.gemv(Aspkd, xp, y, trans = 'T',alpha = -1.0, beta = 1.0, \ m = ns*(ns+1)/2, n = ms,offsetx = nl) #y = y - mul(+W['d'][:nl/2],xp[:nl/2])+ mul(+W['d'][nl/2:nl],xp[nl/2:nl]) blas.tbmv(+W['d'], xp, n=nl, k=0, ldA=1) blas.axpy(xp, y, alpha=-1, n=ms) blas.axpy(xp, y, alpha=1, n=ms, offsetx=nl / 2) # y := -y - A(bz) # = -by - A(bz) + As(Gamma .* (u' * (bx + rho * bz) * u) Af(bz, y, alpha=-1.0, beta=-1.0) # y := H^-1 * y # = H^-1 ( -by - A(bz) + As(Gamma.* u'*(bx + rho*bz)*u) ) # = uy blas.trsv(H, y) blas.trsv(H, y, trans='T') # bz = Vt' * vz * Vt # = uz where # vz := Gamma .* ( As'(uy) - x ) # = Gamma .* ( As'(uy) - Gamma .* (u'*(bx + rho *bz)*u) ) # = Gamma.^2 .* ( u' * (A'(uy) - bx - rho * bz) * u ). misc.pack(x, xp, dims) blas.scal(-1.0, xp) blas.gemv(Aspkd, y, xp, alpha=1.0, beta=1.0, m=ns * (ns + 1) / 2, n=ms, offsety=nl) #xp[:nl/2] = xp[:nl/2] + mul(+W['d'][:nl/2],y) #xp[nl/2:nl] = xp[nl/2:nl] - mul(+W['d'][nl/2:nl],y) blas.copy(y, tmp) blas.tbmv(+W['d'], tmp, n=nl / 2, k=0, ldA=1) blas.axpy(tmp, xp, n=nl / 2) blas.copy(y, tmp) blas.tbmv(+W['d'], tmp, n=nl / 2, k=0, ldA=1, offsetA=nl / 2) blas.axpy(tmp, xp, alpha=-1, n=nl / 2, offsety=nl / 2) # bz[j] is xp unpacked and multiplied with Gamma blas.copy(xp, bz) #,n = nl) misc.unpack(xp, bz, dims) blas.tbmv(Gamma, bz, n=ns2, k=0, ldA=1, offsetx=nl) # bz = Vt' * bz * Vt # = uz cngrnc(Vt, bz, trans='T', offsetx=nl) symmetrize(bz, ns, offset=nl) # x = -bz - r * uz * r' # z contains r.h.s. bz; copy to x #so far, z = bzc (untouched) blas.copy(z, x) blas.copy(bz, z) cngrnc(W['r'][0], bz, offsetx=nl) blas.tbmv(W['d'], bz, n=nl, k=0, ldA=1) blas.axpy(bz, x) blas.scal(-1.0, x)
def solve(x, y, z): """ Returns solution of rho * ux + A'(uy) - r^-T * uz * r^-1 = bx A(ux) = by -ux - r * uz * r' = bz. On entry, x = bx, y = by, z = bz. On exit, x = ux, y = uy, z = uz. """ # bz is a copy of z in the format of x blas.copy(z, bz) # x := x + rho * bz # = bx + rho * bz blas.axpy(bz, x, alpha=rho) # x := Gamma .* (u' * x * u) # = Gamma .* (u' * (bx + rho * bz) * u) offsetj = 0 for j in xrange(N): cngrnc(U[j], x, trans='T', offsetx=offsetj, n=ns[j]) blas.tbmv(Gamma[j], x, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) offsetj += ns[j]**2 # y := y - As(x) # := by - As( Gamma .* u' * (bx + rho * bz) * u) blas.copy(x, xp) offsetj = 0 for j in xrange(N): misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 offseti = 0 for i in xrange(M): offsetj = 0 for j in xrange(N): if type(As[i][j]) is matrix: blas.gemv(As[i][j], xp, y, trans='T', alpha=-1.0, beta=1.0, m=ns[j] * (ns[j] + 1) / 2, n=ms[i], ldA=ns[j]**2, offsetx=offsetj, offsety=offseti) offsetj += ns[j]**2 offseti += ms[i] # y := -y - A(bz) # = -by - A(bz) + As(Gamma .* (u' * (bx + rho * bz) * u) Af(bz, y, alpha=-1.0, beta=-1.0) # y := H^-1 * y # = H^-1 ( -by - A(bz) + As(Gamma.* u'*(bx + rho*bz)*u) ) # = uy cholmod.solve(HF, y) # bz = Vt' * vz * Vt # = uz where # vz := Gamma .* ( As'(uy) - x ) # = Gamma .* ( As'(uy) - Gamma .* (u'*(bx + rho *bz)*u) ) # = Gamma.^2 .* ( u' * (A'(uy) - bx - rho * bz) * u ). blas.copy(x, xp) offsetj = 0 for j in xrange(N): # xp is -x[j] = -Gamma .* (u' * (bx + rho*bz) * u) # in packed storage misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 blas.scal(-1.0, xp) offsetj = 0 for j in xrange(N): # xp += As'(uy) offseti = 0 for i in xrange(M): if type(As[i][j]) is matrix: blas.gemv(As[i][j], y, xp, alpha = 1.0, beta = 1.0, m = ns[j]*(ns[j]+1)/2, \ n = ms[i],ldA = ns[j]**2, \ offsetx = offseti, offsety = offsetj) offseti += ms[i] # bz[j] is xp unpacked and multiplied with Gamma #unpack(xp, bz[j], ns[j]) misc.unpack(xp, bz, { 'l': 0, 'q': [], 's': [ns[j]] }, offsetx=offsetj, offsety=offsetj) blas.tbmv(Gamma[j], bz, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) # bz = Vt' * bz * Vt # = uz cngrnc(Vt[j], bz, trans='T', offsetx=offsetj, n=ns[j]) symmetrize(bz, ns[j], offset=offsetj) offsetj += ns[j]**2 # x = -bz - r * uz * r' blas.copy(z, x) blas.copy(bz, z) offsetj = 0 for j in xrange(N): cngrnc(+W['r'][j], bz, offsetx=offsetj, n=ns[j]) offsetj += ns[j]**2 blas.axpy(bz, x) blas.scal(-1.0, x)
def F(W): for j in xrange(N): # SVD R[j] = U[j] * diag(sig[j]) * Vt[j] lapack.gesvd(+W['r'][j], sv[j], jobu='A', jobvt='A', U=U[j], Vt=Vt[j]) # Vt[j] := diag(sig[j])^-1 * Vt[j] for k in xrange(ns[j]): blas.tbsv(sv[j], Vt[j], n=ns[j], k=0, ldA=1, offsetx=k * ns[j]) # Gamma[j] is an ns[j] x ns[j] symmetric matrix # # (sig[j] * sig[j]') ./ sqrt(1 + rho * (sig[j] * sig[j]').^2) # S = sig[j] * sig[j]' S = matrix(0.0, (ns[j], ns[j])) blas.syrk(sv[j], S) Gamma[j][:] = div(S, sqrt(1.0 + rho * S**2))[:] symmetrize(Gamma[j], ns[j]) # As represents the scaled mapping # # As(x) = A(u * (Gamma .* x) * u') # As'(y) = Gamma .* (u' * A'(y) * u) # # stored in a similar format as A, except that we use packed # storage for the columns of As[i][j]. for i in xrange(M): if (type(A[i][j]) is matrix) or (type(A[i][j]) is spmatrix): # As[i][j][:,k] = vec( # (U[j]' * mat( A[i][j][:,k] ) * U[j]) .* Gamma[j]) copy(A[i][j], As[i][j]) As[i][j] = matrix(As[i][j]) for k in xrange(ms[i]): cngrnc(U[j], As[i][j], trans='T', offsetx=k * (ns[j]**2), n=ns[j]) blas.tbmv(Gamma[j], As[i][j], n=ns[j]**2, k=0, ldA=1, offsetx=k * (ns[j]**2)) # pack As[i][j] in place #pack_ip(As[i][j], ns[j]) for k in xrange(As[i][j].size[1]): tmp = +As[i][j][:, k] misc.pack2(tmp, {'l': 0, 'q': [], 's': [ns[j]]}) As[i][j][:, k] = tmp else: As[i][j] = 0.0 # H is an m times m block matrix with i, k block # # Hik = sum_j As[i,j]' * As[k,j] # # of size ms[i] x ms[k]. Hik = 0 if As[i,j] or As[k,j] # are zero for all j H = spmatrix([], [], [], (sum(ms), sum(ms))) rowid = 0 for i in xrange(M): colid = 0 for k in xrange(i + 1): sparse_block = True Hik = matrix(0.0, (ms[i], ms[k])) for j in xrange(N): if (type(As[i][j]) is matrix) and \ (type(As[k][j]) is matrix): sparse_block = False # Hik += As[i,j]' * As[k,j] if i == k: blas.syrk(As[i][j], Hik, trans='T', beta=1.0, k=ns[j] * (ns[j] + 1) / 2, ldA=ns[j]**2) else: blas.gemm(As[i][j], As[k][j], Hik, transA='T', beta=1.0, k=ns[j] * (ns[j] + 1) / 2, ldA=ns[j]**2, ldB=ns[j]**2) if not (sparse_block): H[rowid:rowid+ms[i], colid:colid+ms[k]] \ = sparse(Hik) colid += ms[k] rowid += ms[i] HF = cholmod.symbolic(H) cholmod.numeric(H, HF) def solve(x, y, z): """ Returns solution of rho * ux + A'(uy) - r^-T * uz * r^-1 = bx A(ux) = by -ux - r * uz * r' = bz. On entry, x = bx, y = by, z = bz. On exit, x = ux, y = uy, z = uz. """ # bz is a copy of z in the format of x blas.copy(z, bz) # x := x + rho * bz # = bx + rho * bz blas.axpy(bz, x, alpha=rho) # x := Gamma .* (u' * x * u) # = Gamma .* (u' * (bx + rho * bz) * u) offsetj = 0 for j in xrange(N): cngrnc(U[j], x, trans='T', offsetx=offsetj, n=ns[j]) blas.tbmv(Gamma[j], x, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) offsetj += ns[j]**2 # y := y - As(x) # := by - As( Gamma .* u' * (bx + rho * bz) * u) blas.copy(x, xp) offsetj = 0 for j in xrange(N): misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 offseti = 0 for i in xrange(M): offsetj = 0 for j in xrange(N): if type(As[i][j]) is matrix: blas.gemv(As[i][j], xp, y, trans='T', alpha=-1.0, beta=1.0, m=ns[j] * (ns[j] + 1) / 2, n=ms[i], ldA=ns[j]**2, offsetx=offsetj, offsety=offseti) offsetj += ns[j]**2 offseti += ms[i] # y := -y - A(bz) # = -by - A(bz) + As(Gamma .* (u' * (bx + rho * bz) * u) Af(bz, y, alpha=-1.0, beta=-1.0) # y := H^-1 * y # = H^-1 ( -by - A(bz) + As(Gamma.* u'*(bx + rho*bz)*u) ) # = uy cholmod.solve(HF, y) # bz = Vt' * vz * Vt # = uz where # vz := Gamma .* ( As'(uy) - x ) # = Gamma .* ( As'(uy) - Gamma .* (u'*(bx + rho *bz)*u) ) # = Gamma.^2 .* ( u' * (A'(uy) - bx - rho * bz) * u ). blas.copy(x, xp) offsetj = 0 for j in xrange(N): # xp is -x[j] = -Gamma .* (u' * (bx + rho*bz) * u) # in packed storage misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 blas.scal(-1.0, xp) offsetj = 0 for j in xrange(N): # xp += As'(uy) offseti = 0 for i in xrange(M): if type(As[i][j]) is matrix: blas.gemv(As[i][j], y, xp, alpha = 1.0, beta = 1.0, m = ns[j]*(ns[j]+1)/2, \ n = ms[i],ldA = ns[j]**2, \ offsetx = offseti, offsety = offsetj) offseti += ms[i] # bz[j] is xp unpacked and multiplied with Gamma #unpack(xp, bz[j], ns[j]) misc.unpack(xp, bz, { 'l': 0, 'q': [], 's': [ns[j]] }, offsetx=offsetj, offsety=offsetj) blas.tbmv(Gamma[j], bz, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) # bz = Vt' * bz * Vt # = uz cngrnc(Vt[j], bz, trans='T', offsetx=offsetj, n=ns[j]) symmetrize(bz, ns[j], offset=offsetj) offsetj += ns[j]**2 # x = -bz - r * uz * r' blas.copy(z, x) blas.copy(bz, z) offsetj = 0 for j in xrange(N): cngrnc(+W['r'][j], bz, offsetx=offsetj, n=ns[j]) offsetj += ns[j]**2 blas.axpy(bz, x) blas.scal(-1.0, x) return solve