def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans='T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans='T', offsetx=msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans='T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha=-1.0, offsetx=msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n=msq, k=0, ldA=1) blas.axpy(z, x1, n=msq) blas.tbsv(sqrtG, x1, n=msq, k=0, ldA=1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m=mpckd, trans='T', beta=1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m=mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n=msq, k=0, ldA=1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n=msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n=msq, k=0, ldA=1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n=msq, offsety=msq) blas.scal(-1.0, z, offset=msq) __cngrnc(U, z, offsetx=msq) __cngrnc(W['r'][1], z, trans='T', offsetx=msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n=msq, k=0, ldA=1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n=msq) blas.scal(-1.0, z, n=msq) blas.axpy(x[1], z, -1.0, n=msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans='T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1])
def F(W): """ Generate a solver for A'(uz0) = bx[0] -uz0 - uz1 = bx[1] A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 - ux[1] - r1*r1' * uz1 * r1*r1' = bz1. uz0, uz1, bz0, bz1 are symmetric m x m-matrices. ux[0], bx[0] are n-vectors. ux[1], bx[1] are symmetric m x m-matrices. We first calculate a congruence that diagonalizes r0*r0' and r1*r1': U' * r0 * r0' * U = I, U' * r1 * r1' * U = S. We then make a change of variables usx[0] = ux[0], usx[1] = U' * ux[1] * U usz0 = U^-1 * uz0 * U^-T usz1 = U^-1 * uz1 * U^-T and define As() = U' * A() * U' bsx[1] = U^-1 * bx[1] * U^-T bsz0 = U' * bz0 * U bsz1 = U' * bz1 * U. This gives As'(usz0) = bx[0] -usz0 - usz1 = bsx[1] As(usx[0]) - usx[1] - usz0 = bsz0 -usx[1] - S * usz1 * S = bsz1. 1. Eliminate usz0, usz1 using equations 3 and 4, usz0 = As(usx[0]) - usx[1] - bsz0 usz1 = -S^-1 * (usx[1] + bsz1) * S^-1. This gives two equations in usx[0] an usx[1]. As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0) -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1 = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1. 2. Eliminate usx[1] using equation 2: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j], usx[1] = ( S * As(usx[0]) * S ) ./ Gamma + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma. This gives an equation in usx[0]. As'( As(usx[0]) ./ Gamma ) = bx0 + As'(bsz0) + As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma ) = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ). """ # Calculate U s.t. # # U' * r0*r0' * U = I, U' * r1*r1' * U = diag(s). # Cholesky factorization r0 * r0' = L * L' blas.syrk(W['r'][0], L) lapack.potrf(L) # SVD L^-1 * r1 = U * diag(s) * V' blas.copy(W['r'][1], U) blas.trsm(L, U) lapack.gesvd(U, s, jobu='O') # s := s**2 s[:] = s**2 # Uti := U blas.copy(U, Uti) # U := L^-T * U blas.trsm(L, U, transA='T') # Uti := L * Uti = U^-T blas.trmm(L, Uti) # Us := U * diag(s)^-1 blas.copy(U, Us) for i in range(m): blas.tbsv(s, Us, n=m, k=0, ldA=1, incx=m, offsetx=i) # S is m x m with lower triangular entries s[i] * s[j] # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j]) # Upper triangular entries are undefined but nonzero. blas.scal(0.0, S) blas.syrk(s, S) Gamma = 1.0 + S sqrtG = sqrt(Gamma) # Asc[i] = (U' * Ai * * U ) ./ sqrtG, for i = 1, ..., n # = Asi ./ sqrt(Gamma) blas.copy(A, Asc) misc.scale( Asc, # only 'r' part of the dictionary is used { 'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)), 'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)), 'v': [], 'beta': [], 'r': [U], 'rti': [U] }) for i in range(n): blas.tbsv(sqrtG, Asc, n=msq, k=0, ldA=1, offsetx=i * msq) # Convert columns of Asc to packed storage misc.pack2(Asc, {'l': 0, 'q': [], 's': [m]}) # Cholesky factorization of Asc' * Asc. H = matrix(0.0, (n, n)) blas.syrk(Asc, H, trans='T', k=mpckd) lapack.potrf(H) def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans='T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans='T', offsetx=msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans='T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha=-1.0, offsetx=msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n=msq, k=0, ldA=1) blas.axpy(z, x1, n=msq) blas.tbsv(sqrtG, x1, n=msq, k=0, ldA=1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m=mpckd, trans='T', beta=1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m=mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n=msq, k=0, ldA=1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n=msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n=msq, k=0, ldA=1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n=msq, offsety=msq) blas.scal(-1.0, z, offset=msq) __cngrnc(U, z, offsetx=msq) __cngrnc(W['r'][1], z, trans='T', offsetx=msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n=msq, k=0, ldA=1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n=msq) blas.scal(-1.0, z, n=msq) blas.axpy(x[1], z, -1.0, n=msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans='T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1]) return solve
def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans = 'T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans = 'T', offsetx = msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans = 'T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha = -1.0, offsetx = msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n = msq, k = 0, ldA = 1) blas.axpy(z, x1, n = msq) blas.tbsv(sqrtG, x1, n = msq, k = 0, ldA = 1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m = mpckd, trans = 'T', beta = 1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m = mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n = msq, k = 0, ldA = 1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n = msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n = msq, k = 0, ldA = 1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n = msq, offsety = msq) blas.scal(-1.0, z, offset = msq) __cngrnc(U, z, offsetx = msq) __cngrnc(W['r'][1], z, trans = 'T', offsetx = msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n = msq, k = 0, ldA = 1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n = msq) blas.scal(-1.0, z, n = msq) blas.axpy(x[1], z, -1.0, n = msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans = 'T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1])
def F(W): """ Generate a solver for A'(uz0) = bx[0] -uz0 - uz1 = bx[1] A(ux[0]) - ux[1] - r0*r0' * uz0 * r0*r0' = bz0 - ux[1] - r1*r1' * uz1 * r1*r1' = bz1. uz0, uz1, bz0, bz1 are symmetric m x m-matrices. ux[0], bx[0] are n-vectors. ux[1], bx[1] are symmetric m x m-matrices. We first calculate a congruence that diagonalizes r0*r0' and r1*r1': U' * r0 * r0' * U = I, U' * r1 * r1' * U = S. We then make a change of variables usx[0] = ux[0], usx[1] = U' * ux[1] * U usz0 = U^-1 * uz0 * U^-T usz1 = U^-1 * uz1 * U^-T and define As() = U' * A() * U' bsx[1] = U^-1 * bx[1] * U^-T bsz0 = U' * bz0 * U bsz1 = U' * bz1 * U. This gives As'(usz0) = bx[0] -usz0 - usz1 = bsx[1] As(usx[0]) - usx[1] - usz0 = bsz0 -usx[1] - S * usz1 * S = bsz1. 1. Eliminate usz0, usz1 using equations 3 and 4, usz0 = As(usx[0]) - usx[1] - bsz0 usz1 = -S^-1 * (usx[1] + bsz1) * S^-1. This gives two equations in usx[0] an usx[1]. As'(As(usx[0]) - usx[1]) = bx[0] + As'(bsz0) -As(usx[0]) + usx[1] + S^-1 * usx[1] * S^-1 = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1. 2. Eliminate usx[1] using equation 2: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 i.e., with Gamma[i,j] = 1.0 + S[i,i] * S[j,j], usx[1] = ( S * As(usx[0]) * S ) ./ Gamma + ( S * ( bsx[1] - bsz0 ) * S - bsz1 ) ./ Gamma. This gives an equation in usx[0]. As'( As(usx[0]) ./ Gamma ) = bx0 + As'(bsz0) + As'( (S * ( bsx[1] - bsz0 ) * S - bsz1) ./ Gamma ) = bx0 + As'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ Gamma ). """ # Calculate U s.t. # # U' * r0*r0' * U = I, U' * r1*r1' * U = diag(s). # Cholesky factorization r0 * r0' = L * L' blas.syrk(W['r'][0], L) lapack.potrf(L) # SVD L^-1 * r1 = U * diag(s) * V' blas.copy(W['r'][1], U) blas.trsm(L, U) lapack.gesvd(U, s, jobu = 'O') # s := s**2 s[:] = s**2 # Uti := U blas.copy(U, Uti) # U := L^-T * U blas.trsm(L, U, transA = 'T') # Uti := L * Uti = U^-T blas.trmm(L, Uti) # Us := U * diag(s)^-1 blas.copy(U, Us) for i in range(m): blas.tbsv(s, Us, n = m, k = 0, ldA = 1, incx = m, offsetx = i) # S is m x m with lower triangular entries s[i] * s[j] # sqrtG is m x m with lower triangular entries sqrt(1.0 + s[i]*s[j]) # Upper triangular entries are undefined but nonzero. blas.scal(0.0, S) blas.syrk(s, S) Gamma = 1.0 + S sqrtG = sqrt(Gamma) # Asc[i] = (U' * Ai * * U ) ./ sqrtG, for i = 1, ..., n # = Asi ./ sqrt(Gamma) blas.copy(A, Asc) misc.scale(Asc, # only 'r' part of the dictionary is used {'dnl': matrix(0.0, (0, 1)), 'dnli': matrix(0.0, (0, 1)), 'd': matrix(0.0, (0, 1)), 'di': matrix(0.0, (0, 1)), 'v': [], 'beta': [], 'r': [ U ], 'rti': [ U ]}) for i in range(n): blas.tbsv(sqrtG, Asc, n = msq, k = 0, ldA = 1, offsetx = i*msq) # Convert columns of Asc to packed storage misc.pack2(Asc, {'l': 0, 'q': [], 's': [ m ]}) # Cholesky factorization of Asc' * Asc. H = matrix(0.0, (n, n)) blas.syrk(Asc, H, trans = 'T', k = mpckd) lapack.potrf(H) def solve(x, y, z): """ 1. Solve for usx[0]: Asc'(Asc(usx[0])) = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) = bx0 + Asc'( ( bsz0 + S * ( bsx[1] - bssz1) S ) ./ sqrtG) where bsx[1] = U^-1 * bx[1] * U^-T, bsz0 = U' * bz0 * U, bsz1 = U' * bz1 * U, bssz1 = S^-1 * bsz1 * S^-1 2. Solve for usx[1]: usx[1] + S * usx[1] * S = S * ( As(usx[0]) + bsx[1] - bsz0 ) * S - bsz1 usx[1] = ( S * (As(usx[0]) + bsx[1] - bsz0) * S - bsz1) ./ Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 - bsz1 + S * bsx[1] * S ) . / Gamma = -bsz0 + (S * As(usx[0]) * S) ./ Gamma + (bsz0 + S * ( bsx[1] - bssz1 ) * S ) . / Gamma Unscale ux[1] = Uti * usx[1] * Uti' 3. Compute usz0, usz1 r0' * uz0 * r0 = r0^-1 * ( A(ux[0]) - ux[1] - bz0 ) * r0^-T r1' * uz1 * r1 = r1^-1 * ( -ux[1] - bz1 ) * r1^-T """ # z0 := U' * z0 * U # = bsz0 __cngrnc(U, z, trans = 'T') # z1 := Us' * bz1 * Us # = S^-1 * U' * bz1 * U * S^-1 # = S^-1 * bsz1 * S^-1 __cngrnc(Us, z, trans = 'T', offsetx = msq) # x[1] := Uti' * x[1] * Uti # = bsx[1] __cngrnc(Uti, x[1], trans = 'T') # x[1] := x[1] - z[msq:] # = bsx[1] - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], alpha = -1.0, offsetx = msq) # x1 = (S * x[1] * S + z[:msq] ) ./ sqrtG # = (S * ( bsx[1] - S^-1 * bsz1 * S^-1) * S + bsz0 ) ./ sqrtG # = (S * bsx[1] * S - bsz1 + bsz0 ) ./ sqrtG # in packed storage blas.copy(x[1], x1) blas.tbmv(S, x1, n = msq, k = 0, ldA = 1) blas.axpy(z, x1, n = msq) blas.tbsv(sqrtG, x1, n = msq, k = 0, ldA = 1) misc.pack2(x1, {'l': 0, 'q': [], 's': [m]}) # x[0] := x[0] + Asc'*x1 # = bx0 + Asc'( ( bsz0 - bsz1 + S * bsx[1] * S ) ./ sqrtG) # = bx0 + As'( ( bz0 - bz1 + S * bx[1] * S ) ./ Gamma ) blas.gemv(Asc, x1, x[0], m = mpckd, trans = 'T', beta = 1.0) # x[0] := H^-1 * x[0] # = ux[0] lapack.potrs(H, x[0]) # x1 = Asc(x[0]) .* sqrtG (unpacked) # = As(x[0]) blas.gemv(Asc, x[0], tmp, m = mpckd) misc.unpack(tmp, x1, {'l': 0, 'q': [], 's': [m]}) blas.tbmv(sqrtG, x1, n = msq, k = 0, ldA = 1) # usx[1] = (x1 + (x[1] - z[:msq])) ./ sqrtG**2 # = (As(ux[0]) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1) # ./ Gamma # x[1] := x[1] - z[:msq] # = bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(z, x[1], -1.0, n = msq) # x[1] := x[1] + x1 # = As(ux) + bsx[1] - bsz0 - S^-1 * bsz1 * S^-1 blas.axpy(x1, x[1]) # x[1] := x[1] / Gammma # = (As(ux) + bsx[1] - bsz0 + S^-1 * bsz1 * S^-1 ) / Gamma # = S^-1 * usx[1] * S^-1 blas.tbsv(Gamma, x[1], n = msq, k = 0, ldA = 1) # z[msq:] := r1' * U * (-z[msq:] - x[1]) * U * r1 # := -r1' * U * S^-1 * (bsz1 + ux[1]) * S^-1 * U * r1 # := -r1' * uz1 * r1 blas.axpy(x[1], z, n = msq, offsety = msq) blas.scal(-1.0, z, offset = msq) __cngrnc(U, z, offsetx = msq) __cngrnc(W['r'][1], z, trans = 'T', offsetx = msq) # x[1] := S * x[1] * S # = usx1 blas.tbmv(S, x[1], n = msq, k = 0, ldA = 1) # z[:msq] = r0' * U' * ( x1 - x[1] - z[:msq] ) * U * r0 # = r0' * U' * ( As(ux) - usx1 - bsz0 ) * U * r0 # = r0' * U' * usz0 * U * r0 # = r0' * uz0 * r0 blas.axpy(x1, z, -1.0, n = msq) blas.scal(-1.0, z, n = msq) blas.axpy(x[1], z, -1.0, n = msq) __cngrnc(U, z) __cngrnc(W['r'][0], z, trans = 'T') # x[1] := Uti * x[1] * Uti' # = ux[1] __cngrnc(Uti, x[1]) return solve
def solve(x, y, z): """ Returns solution of rho * ux + A'(uy) - r^-T * uz * r^-1 = bx A(ux) = by -ux - r * uz * r' = bz. On entry, x = bx, y = by, z = bz. On exit, x = ux, y = uy, z = uz. """ # bz is a copy of z in the format of x blas.copy(z, bz) # x := x + rho * bz # = bx + rho * bz blas.axpy(bz, x, alpha=rho) # x := Gamma .* (u' * x * u) # = Gamma .* (u' * (bx + rho * bz) * u) offsetj = 0 for j in xrange(N): cngrnc(U[j], x, trans='T', offsetx=offsetj, n=ns[j]) blas.tbmv(Gamma[j], x, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) offsetj += ns[j]**2 # y := y - As(x) # := by - As( Gamma .* u' * (bx + rho * bz) * u) blas.copy(x, xp) offsetj = 0 for j in xrange(N): misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 offseti = 0 for i in xrange(M): offsetj = 0 for j in xrange(N): if type(As[i][j]) is matrix: blas.gemv(As[i][j], xp, y, trans='T', alpha=-1.0, beta=1.0, m=ns[j] * (ns[j] + 1) / 2, n=ms[i], ldA=ns[j]**2, offsetx=offsetj, offsety=offseti) offsetj += ns[j]**2 offseti += ms[i] # y := -y - A(bz) # = -by - A(bz) + As(Gamma .* (u' * (bx + rho * bz) * u) Af(bz, y, alpha=-1.0, beta=-1.0) # y := H^-1 * y # = H^-1 ( -by - A(bz) + As(Gamma.* u'*(bx + rho*bz)*u) ) # = uy cholmod.solve(HF, y) # bz = Vt' * vz * Vt # = uz where # vz := Gamma .* ( As'(uy) - x ) # = Gamma .* ( As'(uy) - Gamma .* (u'*(bx + rho *bz)*u) ) # = Gamma.^2 .* ( u' * (A'(uy) - bx - rho * bz) * u ). blas.copy(x, xp) offsetj = 0 for j in xrange(N): # xp is -x[j] = -Gamma .* (u' * (bx + rho*bz) * u) # in packed storage misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 blas.scal(-1.0, xp) offsetj = 0 for j in xrange(N): # xp += As'(uy) offseti = 0 for i in xrange(M): if type(As[i][j]) is matrix: blas.gemv(As[i][j], y, xp, alpha = 1.0, beta = 1.0, m = ns[j]*(ns[j]+1)/2, \ n = ms[i],ldA = ns[j]**2, \ offsetx = offseti, offsety = offsetj) offseti += ms[i] # bz[j] is xp unpacked and multiplied with Gamma #unpack(xp, bz[j], ns[j]) misc.unpack(xp, bz, { 'l': 0, 'q': [], 's': [ns[j]] }, offsetx=offsetj, offsety=offsetj) blas.tbmv(Gamma[j], bz, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) # bz = Vt' * bz * Vt # = uz cngrnc(Vt[j], bz, trans='T', offsetx=offsetj, n=ns[j]) symmetrize(bz, ns[j], offset=offsetj) offsetj += ns[j]**2 # x = -bz - r * uz * r' blas.copy(z, x) blas.copy(bz, z) offsetj = 0 for j in xrange(N): cngrnc(+W['r'][j], bz, offsetx=offsetj, n=ns[j]) offsetj += ns[j]**2 blas.axpy(bz, x) blas.scal(-1.0, x)
def F(W): for j in xrange(N): # SVD R[j] = U[j] * diag(sig[j]) * Vt[j] lapack.gesvd(+W['r'][j], sv[j], jobu='A', jobvt='A', U=U[j], Vt=Vt[j]) # Vt[j] := diag(sig[j])^-1 * Vt[j] for k in xrange(ns[j]): blas.tbsv(sv[j], Vt[j], n=ns[j], k=0, ldA=1, offsetx=k * ns[j]) # Gamma[j] is an ns[j] x ns[j] symmetric matrix # # (sig[j] * sig[j]') ./ sqrt(1 + rho * (sig[j] * sig[j]').^2) # S = sig[j] * sig[j]' S = matrix(0.0, (ns[j], ns[j])) blas.syrk(sv[j], S) Gamma[j][:] = div(S, sqrt(1.0 + rho * S**2))[:] symmetrize(Gamma[j], ns[j]) # As represents the scaled mapping # # As(x) = A(u * (Gamma .* x) * u') # As'(y) = Gamma .* (u' * A'(y) * u) # # stored in a similar format as A, except that we use packed # storage for the columns of As[i][j]. for i in xrange(M): if (type(A[i][j]) is matrix) or (type(A[i][j]) is spmatrix): # As[i][j][:,k] = vec( # (U[j]' * mat( A[i][j][:,k] ) * U[j]) .* Gamma[j]) copy(A[i][j], As[i][j]) As[i][j] = matrix(As[i][j]) for k in xrange(ms[i]): cngrnc(U[j], As[i][j], trans='T', offsetx=k * (ns[j]**2), n=ns[j]) blas.tbmv(Gamma[j], As[i][j], n=ns[j]**2, k=0, ldA=1, offsetx=k * (ns[j]**2)) # pack As[i][j] in place #pack_ip(As[i][j], ns[j]) for k in xrange(As[i][j].size[1]): tmp = +As[i][j][:, k] misc.pack2(tmp, {'l': 0, 'q': [], 's': [ns[j]]}) As[i][j][:, k] = tmp else: As[i][j] = 0.0 # H is an m times m block matrix with i, k block # # Hik = sum_j As[i,j]' * As[k,j] # # of size ms[i] x ms[k]. Hik = 0 if As[i,j] or As[k,j] # are zero for all j H = spmatrix([], [], [], (sum(ms), sum(ms))) rowid = 0 for i in xrange(M): colid = 0 for k in xrange(i + 1): sparse_block = True Hik = matrix(0.0, (ms[i], ms[k])) for j in xrange(N): if (type(As[i][j]) is matrix) and \ (type(As[k][j]) is matrix): sparse_block = False # Hik += As[i,j]' * As[k,j] if i == k: blas.syrk(As[i][j], Hik, trans='T', beta=1.0, k=ns[j] * (ns[j] + 1) / 2, ldA=ns[j]**2) else: blas.gemm(As[i][j], As[k][j], Hik, transA='T', beta=1.0, k=ns[j] * (ns[j] + 1) / 2, ldA=ns[j]**2, ldB=ns[j]**2) if not (sparse_block): H[rowid:rowid+ms[i], colid:colid+ms[k]] \ = sparse(Hik) colid += ms[k] rowid += ms[i] HF = cholmod.symbolic(H) cholmod.numeric(H, HF) def solve(x, y, z): """ Returns solution of rho * ux + A'(uy) - r^-T * uz * r^-1 = bx A(ux) = by -ux - r * uz * r' = bz. On entry, x = bx, y = by, z = bz. On exit, x = ux, y = uy, z = uz. """ # bz is a copy of z in the format of x blas.copy(z, bz) # x := x + rho * bz # = bx + rho * bz blas.axpy(bz, x, alpha=rho) # x := Gamma .* (u' * x * u) # = Gamma .* (u' * (bx + rho * bz) * u) offsetj = 0 for j in xrange(N): cngrnc(U[j], x, trans='T', offsetx=offsetj, n=ns[j]) blas.tbmv(Gamma[j], x, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) offsetj += ns[j]**2 # y := y - As(x) # := by - As( Gamma .* u' * (bx + rho * bz) * u) blas.copy(x, xp) offsetj = 0 for j in xrange(N): misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 offseti = 0 for i in xrange(M): offsetj = 0 for j in xrange(N): if type(As[i][j]) is matrix: blas.gemv(As[i][j], xp, y, trans='T', alpha=-1.0, beta=1.0, m=ns[j] * (ns[j] + 1) / 2, n=ms[i], ldA=ns[j]**2, offsetx=offsetj, offsety=offseti) offsetj += ns[j]**2 offseti += ms[i] # y := -y - A(bz) # = -by - A(bz) + As(Gamma .* (u' * (bx + rho * bz) * u) Af(bz, y, alpha=-1.0, beta=-1.0) # y := H^-1 * y # = H^-1 ( -by - A(bz) + As(Gamma.* u'*(bx + rho*bz)*u) ) # = uy cholmod.solve(HF, y) # bz = Vt' * vz * Vt # = uz where # vz := Gamma .* ( As'(uy) - x ) # = Gamma .* ( As'(uy) - Gamma .* (u'*(bx + rho *bz)*u) ) # = Gamma.^2 .* ( u' * (A'(uy) - bx - rho * bz) * u ). blas.copy(x, xp) offsetj = 0 for j in xrange(N): # xp is -x[j] = -Gamma .* (u' * (bx + rho*bz) * u) # in packed storage misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 blas.scal(-1.0, xp) offsetj = 0 for j in xrange(N): # xp += As'(uy) offseti = 0 for i in xrange(M): if type(As[i][j]) is matrix: blas.gemv(As[i][j], y, xp, alpha = 1.0, beta = 1.0, m = ns[j]*(ns[j]+1)/2, \ n = ms[i],ldA = ns[j]**2, \ offsetx = offseti, offsety = offsetj) offseti += ms[i] # bz[j] is xp unpacked and multiplied with Gamma #unpack(xp, bz[j], ns[j]) misc.unpack(xp, bz, { 'l': 0, 'q': [], 's': [ns[j]] }, offsetx=offsetj, offsety=offsetj) blas.tbmv(Gamma[j], bz, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) # bz = Vt' * bz * Vt # = uz cngrnc(Vt[j], bz, trans='T', offsetx=offsetj, n=ns[j]) symmetrize(bz, ns[j], offset=offsetj) offsetj += ns[j]**2 # x = -bz - r * uz * r' blas.copy(z, x) blas.copy(bz, z) offsetj = 0 for j in xrange(N): cngrnc(+W['r'][j], bz, offsetx=offsetj, n=ns[j]) offsetj += ns[j]**2 blas.axpy(bz, x) blas.scal(-1.0, x) return solve