def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma - theta K = matrix(theta, (M, Nr)) blas.gemm(Y, Xr, K, transB='T', alpha=1.0 / sigma, beta=-1.0) K = exp(K) x = div(K - K**-1, K + K**-1) * zr + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x])
def classifier(Y, soft = False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (M, N)) blas.gemm(Y, X, K, transB = 'T', alpha = 1.0/sigma) x = K**degree * z + b if soft: return x else: return matrix([ 2*(xk > 0.0) - 1 for xk in x ])
def P(x, y, alpha=1.0, beta=0.0): """ x and y are N x m matrices. y = alpha * X * X' * x + beta * y. """ z = matrix(0.0, (n, m)) blas.gemm(X, x, z, transA='T') blas.gemm(X, z, y, alpha=alpha, beta=beta)
def F(W): """ Returns a function f(x, y, z) that solves -diag(z) = bx -diag(x) - r*r'*z*r*r' = bz where r = W['r'][0] = W['rti'][0]^{-T}. """ rti = W['rti'][0] # t = rti*rti' as a nonsymmetric matrix. t = matrix(0.0, (n,n)) blas.gemm(rti, rti, t, transB = 'T') # Cholesky factorization of tsq = t.*t. tsq = t**2 lapack.potrf(tsq) def f(x, y, z): """ On entry, x contains bx, y is empty, and z contains bz stored in column major order. On exit, they contain the solution, with z scaled (vec(r'*z*r) is returned instead of z). We first solve ((rti*rti') .* (rti*rti')) * x = bx - diag(t*bz*t) and take z = - rti' * (diag(x) + bz) * rti. """ # tbst := t * bz * t tbst = +z cngrnc(t, tbst) # x := x - diag(tbst) = bx - diag(rti*rti' * bz * rti*rti') x -= tbst[::n+1] # x := (t.*t)^{-1} * x = (t.*t)^{-1} * (bx - diag(t*bz*t)) lapack.potrs(tsq, x) # z := z + diag(x) = bz + diag(x) z[::n+1] += x # z := -vec(rti' * z * rti) # = -vec(rti' * (diag(x) + bz) * rti cngrnc(rti, z, alpha = -1.0) return f
def classifier2(Y, soft=False): M = Y.size[0] W = matrix(0., (width, M)) blas.gemm(X, Y, W, transB='T', alpha=1.0 / sigma, m=width) lapack.potrs(L11, W) W = matrix([W, matrix(0., (N - width, M))]) chompack.trsm(Lc, W, trans='N') chompack.trsm(Lc, W, trans='T') x = matrix(b, (M, 1)) blas.gemv(W, z, x, trans='T', beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x])
def classifier(Y): M = Y.size[0] # K = Y * X' / sigma K = matrix(0.0, (M, N)) blas.gemm(Y, X, K, transB='T', alpha=1.0 / sigma) S = K**degree * U c = [] for i in range(M): a = zip(list(S[i, :]), range(m)) a.sort(reverse=True) c += [a[0][1]] return c
def classifier2(Y, soft=False): if Y is None: return zs M = Y.size[0] W = matrix(0., (width, M)) blas.gemm(X, Y, W, transB='T', alpha=1.0 / sigma, m=width) W = W**degree lapack.potrs(L11, W) W = matrix([W, matrix(0., (N - width, M))]) chompack.solve(Lc, W, mode=0) chompack.solve(Lc, W, mode=1) x = matrix(b, (M, 1)) blas.gemv(W, z, x, trans='T', beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x])
def Fkkt(W): rti = W['rti'][0] # t = rti*rti' as a nonsymmetric matrix. t = matrix(0.0, (n,n)) blas.gemm(rti, rti, t, transB = 'T') # Cholesky factorization of tsq = t.*t. tsq = t**2 lapack.potrf(tsq) def f(x, y, z): """ Solve -diag(z) = bx -diag(x) - inv(rti*rti') * z * inv(rti*rti') = bs On entry, x and z contain bx and bs. On exit, they contain the solution, with z scaled (inv(rti)'*z*inv(rti) is returned instead of z). We first solve ((rti*rti') .* (rti*rti')) * x = bx - diag(t*bs*t) and take z = -rti' * (diag(x) + bs) * rti. """ # tbst := t * zs * t = t * bs * t tbst = matrix(z, (n,n)) cngrnc(t, tbst) # x := x - diag(tbst) = bx - diag(rti*rti' * bs * rti*rti') x -= tbst[::n+1] # x := (t.*t)^{-1} * x = (t.*t)^{-1} * (bx - diag(t*bs*t)) lapack.potrs(tsq, x) # z := z + diag(x) = bs + diag(x) z[::n+1] += x # z := -rti' * z * rti = -rti' * (diag(x) + bs) * rti cngrnc(rti, z, alpha = -1.0) return f
def Fkkt(W): rti = W['rti'][0] # t = rti*rti' as a nonsymmetric matrix. t = matrix(0.0, (n, n)) blas.gemm(rti, rti, t, transB='T') # Cholesky factorization of tsq = t.*t. tsq = t**2 lapack.potrf(tsq) def f(x, y, z): """ Solve -diag(z) = bx -diag(x) - inv(rti*rti') * z * inv(rti*rti') = bs On entry, x and z contain bx and bs. On exit, they contain the solution, with z scaled (inv(rti)'*z*inv(rti) is returned instead of z). We first solve ((rti*rti') .* (rti*rti')) * x = bx - diag(t*bs*t) and take z = -rti' * (diag(x) + bs) * rti. """ # tbst := t * zs * t = t * bs * t tbst = matrix(z, (n, n)) cngrnc(t, tbst) # x := x - diag(tbst) = bx - diag(rti*rti' * bs * rti*rti') x -= tbst[::n + 1] # x := (t.*t)^{-1} * x = (t.*t)^{-1} * (bx - diag(t*bs*t)) lapack.potrs(tsq, x) # z := z + diag(x) = bs + diag(x) z[::n + 1] += x # z := -rti' * z * rti = -rti' * (diag(x) + bs) * rti cngrnc(rti, z, alpha=-1.0) return f
def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (M, Nr)) blas.gemm(Y, Xr, K, transB='T', alpha=1.0 / sigma) # c[i] = ||Yi||^2 / sigma ones = matrix(1.0, (max([M, Nr, n]), 1)) c = Y**2 * ones[:n] blas.scal(1.0 / sigma, c) # Kij := Kij - 0.5 * (ci + aj) # = || yi - xj ||^2 / (2*sigma) blas.ger(c, ones, K, alpha=-0.5) blas.ger(ones, a[sv], K, alpha=-0.5) x = exp(K) * zr + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x])
def classifier2(Y, soft = False): M = Y.size[0] # K = Y*X' / sigma K = matrix(theta, (width, M)) blas.gemm(X, Y, K, transB = 'T', alpha = 1.0/sigma, beta = -1.0, m = width) K = exp(K) K = div(K - K**-1, K + K**-1) # complete K lapack.potrs(L11,K) K = matrix([K, matrix(0.,(N-width,M))],(N,M)) chompack.trsm(Lc,K,trans='N') chompack.trsm(Lc,K,trans='T') x = matrix(b,(M,1)) blas.gemv(K,z,x,trans='T', beta = 1.0) if soft: return x else: return matrix([ 2*(xk > 0.0) - 1 for xk in x ])
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m)
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m=q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n=q, uplo='L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m=p, offsetA=q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n=p, uplo='L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB='T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu='A', jobvt='A', U=V2, Vt=V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA='T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side='R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q + 1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA='T', m=p, n=q, k=p, ldB=p, offsetB=i * p * q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB='T', m=p, n=q, k=q, ldC=p, offsetC=i * p * q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha=-1.0, beta=1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n=m, k=0, ldA=1, offsetx=k * m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans='T', alpha=2.0) blas.syrk(Gs, H, trans='T', beta=1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo='L', m=q, n=q, ldA=p + q, offsetA=m) lapack.lacpy(z, bz21, m=p, n=q, ldA=p + q, offsetA=m + q) lapack.lacpy(z, bz22, uplo='L', m=p, n=p, ldA=p + q, offsetA=m + (p + q + 1) * q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n=m, k=0, ldA=1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset=m) lapack.lacpy(x[1], z, uplo='L', m=q, n=q, ldB=p + q, offsetB=m) lapack.lacpy(x[2], z, uplo='L', m=p, n=p, ldB=p + q, offsetB=m + (p + q + 1) * q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side='L', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans='T', n=p + q, k=p + q, ldB=p + q, ldC=p + q, offsetC=m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side='R', m=p, n=p + q, ldA=p + q, ldC=p + q, offsetB=q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB='T', m=p, n=q, k=p + q, beta=-1.0, ldA=p + q, ldC=p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA='T', m=p, n=q, k=p, ldB=p) blas.gemm(tmp, V1, bz21, transB='T', m=p, n=q, k=q, ldC=p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha=-1.0, beta=1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans='T', alpha=1.0, beta=1.0) blas.gemv(As, bz21, x[0], trans='T', alpha=2.0, beta=1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha=1.0, beta=-1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha=-1.0, beta=1.0) blas.tbsv(DV, bz21, n=p * q, k=0, ldA=1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha=-1.0, beta=1.0, trans='T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m=p, n=p + q, k=q, ldA=p, ldC=p + q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans='T', beta=-1.0, n=p + q, k=p, offsetA=q, offsetC=m, ldB=p + q, ldC=p + q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc=p + q + 1, offset=m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side='R', m=p + q, n=p + q, ldA=p + q, ldB=p + q, offsetA=m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n=q, alpha=-1.0, beta=-1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n=p, alpha=-1.0, beta=-1.0, offsetA=q, offsetB=q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc=p + q + 1, offset=m) return f
def sysid(y, u, vsig, svth=None): """ System identification using the subspace method and nuclear norm optimization. Estimate a linear time-invariant state-space model given inputs and outputs. The algorithm is described in [1]. INPUT y 'd' matrix of size (p, N). y are the measured outputs, p is the number of outputs, and N is the number of data points measured. u 'd' matrix of size (m, N). u are the inputs, m is the number of inputs, and N is the number of data points. vsig a weighting parameter in the nuclear norm optimization, its value is approximately the 1-sigma output noise level svth an optional parameter, if specified, the model order is determined as the number of singular values greater than svth times the maximum singular value. The default value is 1E-3 OUTPUT sol a dictionary with the following words -- 'A', 'B', 'C', 'D' are the state-space matrices -- 'svN', the original singular values of the Hankel matrix -- 'sv', the optimized singular values of the Hankel matrix -- 'x0', the initial state x(0) -- 'n', the model order [1] Zhang Liu and Lieven Vandenberghe. "Interior-point method for nuclear norm approximation with application to system identification." """ m, N, p = u.size[0], u.size[1], y.size[0] if y.size[1] != N: raise ValueError, "y and u must have the same length" # Y = G*X + H*U + V, Y has size a x b, U has size c x b, Un has b x d r = min(int(30 / p), int((N + 1.0) / (p + m + 1) + 1.0)) a = r * p c = r * m b = N - r + 1 d = b - c # construct Hankel matrix Y Y = Hankel(y, r, b, p=p, q=1) # construct Hankel matrix U U = Hankel(u, r, b, p=m, q=1) # compute Un = null(U) and YUn = Y*Un Vt = matrix(0.0, (b, b)) Stemp = matrix(0.0, (c, 1)) Un = matrix(0.0, (b, d)) YUn = matrix(0.0, (a, d)) lapack.gesvd(U, Stemp, jobvt='A', Vt=Vt) Un[:, :] = Vt.T[:, c:] blas.gemm(Y, Un, YUn) # compute original singular values svN = matrix(0.0, (min(a, d), 1)) lapack.gesvd(YUn, svN) # variable, [y(1);...;y(N)] # form the coefficient matrices for the nuclear norm optimization # minimize | Yh * Un |_* + alpha * | y - yh |_F AA = Hankel_basis(r, b, p=p, q=1) A = matrix(0.0, (a * d, p * N)) temp = spmatrix([], [], [], (a, b), 'd') temp2 = matrix(0.0, (a, d)) for ii in xrange(p * N): temp[:] = AA[:, ii] base.gemm(temp, Un, temp2) A[:, ii] = temp2[:] B = matrix(0.0, (a, d)) # flip the matrix if columns is more than rows if a < d: Itrans = [i + j * a for i in xrange(a) for j in xrange(d)] B[:] = B[Itrans] B.size = (d, a) for ii in xrange(p * N): A[:, ii] = A[Itrans, ii] # regularized term x0 = y[:] Qd = matrix(2.0 * svN[0] / p / N / (vsig**2), (p * N, 1)) # solve the nuclear norm optimization sol = nrmapp(A, B, C=base.spdiag(Qd), d=-base.mul(x0, Qd)) status = sol['status'] x = sol['x'] # construct YhUn and take the svd YhUn = matrix(B) blas.gemv(A, x, YhUn, beta=1.0) if a < d: YhUn = YhUn.T Uh = matrix(0.0, (a, d)) sv = matrix(0.0, (d, 1)) lapack.gesvd(YhUn, sv, jobu='S', U=Uh) # determine model order if svth is None: svth = 1E-3 svthn = sv[0] * svth n = 1 while sv[n] >= svthn and n < 10: n = n + 1 # estimate A, C Uhn = Uh[:, :n] for ii in xrange(n): blas.scal(sv[ii], Uhn, n=a, offset=ii * a) syseC = Uhn[:p, :] Als = Uhn[:-p, :] Bls = Uhn[p:, :] lapack.gels(Als, Bls) syseA = Bls[:n, :] Als[:, :] = Uhn[:-p, :] Bls[:, :] = Uhn[p:, :] blas.gemm(Als, syseA, Bls, beta=-1.0) Aerr = blas.nrm2(Bls) # stabilize A Sc = matrix(0.0, (n, n), 'z') w = matrix(0.0, (n, 1), 'z') Vs = matrix(0.0, (n, n), 'z') def F(w): return (abs(w) < 1.0) Sc[:, :] = syseA ns = lapack.gees(Sc, w, Vs, select=F) while ns < n: #print "stabilize matrix A" w[ns:] = w[ns:]**-1 Sc[::n + 1] = w Sc = Vs * Sc * Vs.H syseA[:, :] = Sc.real() Sc[:, :] = syseA ns = lapack.gees(Sc, w, Vs, select=F) # estimate B,D,x0 stored in vector [x0; vec(D); vec(B)] F1 = matrix(0.0, (p * N, n)) F1[:p, :] = syseC for ii in xrange(1, N): F1[ii * p:(ii + 1) * p, :] = F1[(ii - 1) * p:ii * p, :] * syseA F2 = matrix(0.0, (p * N, p * m)) ut = u.T for ii in xrange(p): F2[ii::p, ii::p] = ut F3 = matrix(0.0, (p * N, n * m)) F3t = matrix(0.0, (p * (N - 1), n * m)) for ii in xrange(1, N): for jj in xrange(p): for kk in xrange(n): F3t[jj:jj + (N - ii) * p:p, kk::n] = ut[:N - ii, :] * F1[(ii - 1) * p + jj, kk] F3[ii * p:, :] = F3[ii * p:, :] + F3t[:(N - ii) * p, :] F = matrix([[F1], [F2], [F3]]) yls = y[:] Sls = matrix(0.0, (F.size[1], 1)) Uls = matrix(0.0, (F.size[0], F.size[1])) Vtls = matrix(0.0, (F.size[1], F.size[1])) lapack.gesvd(F, Sls, jobu='S', jobvt='S', U=Uls, Vt=Vtls) Frank = len([ii for ii in xrange(Sls.size[0]) if Sls[ii] >= 1E-6]) #print 'Rank deficiency = ', F.size[1] - Frank xx = matrix(0.0, (F.size[1], 1)) xx[:Frank] = Uls.T[:Frank, :] * yls xx[:Frank] = base.mul(xx[:Frank], Sls[:Frank]**-1) xx[:] = Vtls.T[:, :Frank] * xx[:Frank] blas.gemv(F, xx, yls, beta=-1.0) xxerr = blas.nrm2(yls) x0 = xx[:n] syseD = xx[n:n + p * m] syseD.size = (p, m) syseB = xx[n + p * m:] syseB.size = (n, m) return {'A': syseA, 'B': syseB, 'C': syseC, 'D': syseD, 'svN': svN, 'sv': \ sv, 'x0': x0, 'n': n, 'Aerr': Aerr, 'xxerr': xxerr}
def Aopt_KKT_solver(si2, W): ''' Construct a solver that solves the KKT equations associated with the cone programming for A-optimal: / 0 At Gt \ / x \ / p \ | A 0 0 | | y | = | q | \ G 0 -Wt W / \ z / \ s / Args: si2: symmetric KxK matrix, si2[i,j] = 1/s_{ij}^2 ''' K = si2.size[0] ds = W['d'] dis = W['di'] # dis[i] := 1./ds[i] rtis = W['rti'] ris = W['r'] d2s = ds**2 di2s = dis**2 # R_i = r_i^{-t}r_i^{-1} Ris = [matrix(0.0, (K + 1, K + 1)) for i in xrange(K)] for i in xrange(K): blas.gemm(rtis[i], rtis[i], Ris[i], transB='T') ddR2 = matrix(0., (K * (K + 1) / 2, K * (K + 1) / 2)) sumdR2_C(Ris, ddR2, K) # upper triangular representation si2ab[(a,b)] := si2[a,b] si2ab = matrix(0., (K * (K + 1) / 2, 1)) p = 0 for i in xrange(K): si2ab[p:p + (K - i)] = si2[i:, i] p += (K - i) si2q = matrix(0., (K * (K + 1) / 2, K * (K + 1) / 2)) blas.syr(si2ab, si2q) sRVR = cvxopt.mul(si2q, ddR2) # We first solve for K(K+1)/2 n_{ab}, K u_i, 1 y nvars = K * (K + 1) / 2 + K # + 1 We solve y by elimination of n and u. Bm = matrix(0.0, (nvars, nvars)) # The LHS matrix of equations # # d_{ab}^{-2} n_{ab} + vec(V_{ab})^t . vec( \sum_i R_i* F R_i*) # + \sum_i vec(V_{ab})^t . vec( g_i g_i^t) u_i + y # = -d_{ab}^{-2}l_{ab} + ( p_{ab} - vec(V_{ab})^t . vec(\sum_i L_i*) # # Coefficients for n_{ab} Bm[:K * (K + 1) / 2, :K * (K + 1) / 2] = cvxopt.mul(si2q, ddR2) row = 0 for a in xrange(K): for b in xrange(a, K): Bm[row, row] += di2s[row] # d_{ab}^{-2} n_{ab} row += 1 assert (K * (K + 1) / 2 == row) # Coefficients for u_i # The LHS of equations # g_i^t F g_i + R_{i,K+1,K+1}^2 u_i = pi - L_{i,K+1,K+1} dg = matrix(0., (K, K * (K + 1) / 2)) g = matrix(0., (K, K)) for i in xrange(K): g[i, :] = Ris[i][K, :K] # dg[:,(a,b)] = g[a] - g[b] if a!=b else g[a] pairwise_diff(g, dg, K) dg2 = dg**2 # dg2 := s[(a,b)]^{-2} dg[(a,b)]^2 for i in xrange(K): dg2[i, :] = cvxopt.mul(si2ab.T, dg2[i, :]) Bm[K * (K + 1) / 2:K * (K + 1) / 2 + K, :-K] = dg2 # Diagonal coefficients for u_i. uoffset = K * (K + 1) / 2 for i in xrange(K): RiKK = Ris[i][K, K] Bm[uoffset + i, uoffset + i] = RiKK**2 # Compare with the default KKT solver. TEST_KKT = False if (TEST_KKT): Bm0 = matrix(0., Bm.size) blas.copy(Bm, Bm0) G, h, A = Aopt_GhA(si2) dims = dict(l=K * (K + 1) / 2, q=[], s=[K + 1] * K) default_solver = misc.kkt_ldl(G, dims, A)(W) ipiv = matrix(0, Bm.size) lapack.sytrf(Bm, ipiv) # TODO: IS THIS A POSITIVE DEFINITE MATRIX? # lapack.potrf( Bm) # oz := (1, ..., 1, 0, ..., 0)' with K*(K+1)/2 ones and K zeros oz = matrix(0., (Bm.size[0], 1)) oz[:K * (K + 1) / 2] = 1. # iB1 := B^{-1} oz iB1 = matrix(oz[:], oz.size) lapack.sytrs(Bm, ipiv, iB1) # lapack.potrs( Bm, iB1) ####### # # The solver # ####### def kkt_solver(x, y, z): if (TEST_KKT): x0 = matrix(0., x.size) y0 = matrix(0., y.size) z0 = matrix(0., z.size) x0[:] = x[:] y0[:] = y[:] z0[:] = z[:] # Get default solver solutions. xp = matrix(0., x.size) yp = matrix(0., y.size) zp = matrix(0., z.size) xp[:] = x[:] yp[:] = y[:] zp[:] = z[:] default_solver(xp, yp, zp) offset = K * (K + 1) / 2 for i in xrange(K): symmetrize_matrix(zp, K + 1, offset) offset += (K + 1) * (K + 1) # pab = x[:K*(K+1)/2] # p_{ab} 1<=a<=b<=K # pis = x[K*(K+1)/2:] # \pi_i 1<=i<=K # z_{ab} := d_{ab}^{-1} z_{ab} # \mat{z}_i = r_i^{-1} \mat{z}_i r_i^{-t} misc.scale(z, W, trans='T', inverse='I') l = z[:] # l_{ab} := d_{ab}^{-2} z_{ab} # \mat{z}_i := r_i^{-t}r_i^{-1} \mat{z}_i r_i^{-t} r_i^{-1} misc.scale(l, W, trans='N', inverse='I') # The RHS of equations # # d_{ab}^{-2}n_{ab} + vec(V_{ab})^t . vec( \sum_i R_i* F R_i*) # + \sum_i vec(V_{ab})^t . vec( g_i g_i^t) u_i + y # = -d_{ab}^{-2} l_{ab} + ( p_{ab} - vec(V_{ab})^t . vec(\sum_i L_i*) # ### # Lsum := \sum_i L_i moffset = K * (K + 1) / 2 Lsum = np.sum(np.array(l[moffset:]).reshape((K, (K + 1) * (K + 1))), axis=0) Lsum = matrix(Lsum, (K + 1, K + 1)) Ls = Lsum[:K, :K] x[:K * (K + 1) / 2] -= l[:K * (K + 1) / 2] dL = matrix(0., (K * (K + 1) / 2, 1)) ab = 0 for a in xrange(K): dL[ab] = Ls[a, a] ab += 1 for b in xrange(a + 1, K): dL[ab] = Ls[a, a] + Ls[b, b] - 2 * Ls[b, a] ab += 1 x[:K * (K + 1) / 2] -= cvxopt.mul(si2ab, dL) # The RHS of equations # g_i^t F g_i + R_{i,K+1,K+1}^2 u_i = pi - L_{i,K+1,K+1} x[K * (K + 1) / 2:] -= l[K * (K + 1) / 2 + (K + 1) * (K + 1) - 1::(K + 1) * (K + 1)] # x := B^{-1} Cv lapack.sytrs(Bm, ipiv, x) # lapack.potrs( Bm, x) # y := (oz'.B^{-1}.Cv[:-1] - y)/(oz'.B^{-1}.oz) y[0] = (blas.dotu(oz, x) - y[0]) / blas.dotu(oz, iB1) # x := B^{-1} Cv - B^{-1}.oz y blas.axpy(iB1, x, -y[0]) # Solve for -n_{ab} - d_{ab}^2 z_{ab} = l_{ab} # We need to return scaled d*z. # z := d_{ab} d_{ab}^{-2}(n_{ab} + l_{ab}) # = d_{ab}^{-1}n_{ab} + d_{ab}^{-1}l_{ab} z[:K * (K + 1) / 2] += cvxopt.mul(dis, x[:K * (K + 1) / 2]) z[:K * (K + 1) / 2] *= -1. # Solve for \mat{z}_i = -R_i (\mat{l}_i + diag(F, u_i)) R_i # = -L_i - R_i diag(F, u_i) R_i # We return # r_i^t \mat{z}_i r_i = -r_i^{-1} (\mat{l}_i + diag(F, u_i)) r_i^{-t} ui = x[-K:] nab = tri2symm(x, K) F = Fisher_matrix(si2, nab) offset = K * (K + 1) / 2 for i in xrange(K): start, end = i * (K + 1) * (K + 1), (i + 1) * (K + 1) * (K + 1) Fu = matrix(0.0, (K + 1, K + 1)) Fu[:K, :K] = F Fu[K, K] = ui[i] Fu = matrix(Fu, ((K + 1) * (K + 1), 1)) # Fu := -r_i^{-1} diag( F, u_i) r_i^{-t} cngrnc(rtis[i], Fu, K + 1, alpha=-1.) # Fu := -r_i^{-1} (\mat{l}_i + diag( F, u_i )) r_i^{-t} blas.axpy(z[offset + start:offset + end], Fu, alpha=-1.) z[offset + start:offset + end] = Fu if (TEST_KKT): offset = K * (K + 1) / 2 for i in xrange(K): symmetrize_matrix(z, K + 1, offset) offset += (K + 1) * (K + 1) dz = np.max(np.abs(z - zp)) dx = np.max(np.abs(x - xp)) dy = np.max(np.abs(y - yp)) tol = 1e-5 if dx > tol: print 'dx=' print dx print x print xp if dy > tol: print 'dy=' print dy print y print yp if dz > tol: print 'dz=' print dz print z print zp if dx > tol or dy > tol or dz > tol: for i, (r, rti) in enumerate(zip(ris, rtis)): print 'r[%d]=' % i print r print 'rti[%d]=' % i print rti print 'rti.T*r=' print rti.T * r for i, d in enumerate(ds): print 'd[%d]=%g' % (i, d) print 'x0, y0, z0=' print x0 print y0 print z0 print Bm0 ### # END of kkt_solver. ### return kkt_solver
def __M2T(L, U, inv=False): n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr stack = [] alpha = 1.0 if inv: alpha = -1.0 for Ut in U: for k in reversed(list(snpost)): nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # allocate F and copy Ut_{Jk,Nk} to leading columns of F F = matrix(0.0, (nj, nj)) lapack.lacpy(Ut.blkval, F, offsetA=blkptr[k], ldA=nj, m=nj, n=nn, uplo='L') # if supernode k is not a root node: if na > 0: # copy Vk to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, offsetB=nn * (nj + 1), m=na, n=na, uplo='L') ## compute T_{Jk,Nk} (stored in leading columns of F) if inv: # if supernode k has any children: for ii in range(chptr[k], chptr[k + 1]): stack.append( frontal_get_update(F, relidx, relptr, chidx[ii])) # if supernode k is not a root node: if na > 0: # F_{Nk,Nk} := F_{Nk,Nk} - alpha*F_{Ak,Nk}'*L_{Ak,Nk} blas.gemm(F, L.blkval, F, beta = 1.0, alpha = -alpha, m = nn, n = nn, k = na,\ transA = 'T', ldA = nj, ldB = nj, ldC = nj,\ offsetA = nn, offsetB = blkptr[k]+nn, offsetC = 0) # F_{Ak,Nk} := F_{Ak,Nk} - alpha*F_{Ak,Ak}*L_{Ak,Nk} blas.symm(F, L.blkval, F, side = 'L', beta = 1.0, alpha = -alpha,\ m = na, n = nn, ldA = nj, ldB = nj, ldC = nj,\ offsetA = (nj+1)*nn, offsetB = blkptr[k]+nn, offsetC = nn) # F_{Nk,Nk} := F_{Nk,Nk} - alpha*L_{Ak,Nk}'*F_{Ak,Nk} blas.gemm(L.blkval, F, F, beta = 1.0, alpha = -alpha, m = nn, n = nn, k = na,\ transA = 'T', ldA = nj, ldB = nj, ldC = nj,\ offsetA = blkptr[k]+nn, offsetB = nn, offsetC = 0) # copy the leading Nk columns of frontal matrix to Ut lapack.lacpy(F, Ut.blkval, offsetB=blkptr[k], ldB=nj, m=nj, n=nn, uplo='L') if not inv: # if supernode k has any children: for ii in range(chptr[k], chptr[k + 1]): stack.append( frontal_get_update(F, relidx, relptr, chidx[ii])) return
def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m)
def sysid(y, u, vsig, svth = None): """ System identification using the subspace method and nuclear norm optimization. Estimate a linear time-invariant state-space model given inputs and outputs. The algorithm is described in [1]. INPUT y 'd' matrix of size (p, N). y are the measured outputs, p is the number of outputs, and N is the number of data points measured. u 'd' matrix of size (m, N). u are the inputs, m is the number of inputs, and N is the number of data points. vsig a weighting parameter in the nuclear norm optimization, its value is approximately the 1-sigma output noise level svth an optional parameter, if specified, the model order is determined as the number of singular values greater than svth times the maximum singular value. The default value is 1E-3 OUTPUT sol a dictionary with the following words -- 'A', 'B', 'C', 'D' are the state-space matrices -- 'svN', the original singular values of the Hankel matrix -- 'sv', the optimized singular values of the Hankel matrix -- 'x0', the initial state x(0) -- 'n', the model order [1] Zhang Liu and Lieven Vandenberghe. "Interior-point method for nuclear norm approximation with application to system identification." """ m, N, p = u.size[0], u.size[1], y.size[0] if y.size[1] != N: raise ValueError, "y and u must have the same length" # Y = G*X + H*U + V, Y has size a x b, U has size c x b, Un has b x d r = min(int(30/p),int((N+1.0)/(p+m+1)+1.0)) a = r*p c = r*m b = N-r+1 d = b-c # construct Hankel matrix Y Y = Hankel(y,r,b,p=p,q=1) # construct Hankel matrix U U = Hankel(u,r,b,p=m,q=1) # compute Un = null(U) and YUn = Y*Un Vt = matrix(0.0,(b,b)) Stemp = matrix(0.0,(c,1)) Un = matrix(0.0,(b,d)) YUn = matrix(0.0,(a,d)) lapack.gesvd(U,Stemp,jobvt='A',Vt=Vt) Un[:,:] = Vt.T[:,c:] blas.gemm(Y,Un,YUn) # compute original singular values svN = matrix(0.0,(min(a,d),1)) lapack.gesvd(YUn,svN) # variable, [y(1);...;y(N)] # form the coefficient matrices for the nuclear norm optimization # minimize | Yh * Un |_* + alpha * | y - yh |_F AA = Hankel_basis(r,b,p=p,q=1) A = matrix(0.0,(a*d,p*N)) temp = spmatrix([],[],[],(a,b),'d') temp2 = matrix(0.0,(a,d)) for ii in xrange(p*N): temp[:] = AA[:,ii] base.gemm(temp,Un,temp2) A[:,ii] = temp2[:] B = matrix(0.0,(a,d)) # flip the matrix if columns is more than rows if a < d: Itrans = [i+j*a for i in xrange(a) for j in xrange(d)] B[:] = B[Itrans] B.size = (d,a) for ii in xrange(p*N): A[:,ii] = A[Itrans,ii] # regularized term x0 = y[:] Qd = matrix(2.0*svN[0]/p/N/(vsig**2),(p*N,1)) # solve the nuclear norm optimization sol = nrmapp(A, B, C = base.spdiag(Qd), d = -base.mul(x0, Qd)) status = sol['status'] x = sol['x'] # construct YhUn and take the svd YhUn = matrix(B) blas.gemv(A,x,YhUn,beta=1.0) if a < d: YhUn = YhUn.T Uh = matrix(0.0,(a,d)) sv = matrix(0.0,(d,1)) lapack.gesvd(YhUn,sv,jobu='S',U=Uh) # determine model order if svth is None: svth = 1E-3 svthn = sv[0]*svth n=1 while sv[n] >= svthn and n < 10: n=n+1 # estimate A, C Uhn = Uh[:,:n] for ii in xrange(n): blas.scal(sv[ii],Uhn,n=a,offset=ii*a) syseC = Uhn[:p,:] Als = Uhn[:-p,:] Bls = Uhn[p:,:] lapack.gels(Als,Bls) syseA = Bls[:n,:] Als[:,:] = Uhn[:-p,:] Bls[:,:] = Uhn[p:,:] blas.gemm(Als,syseA,Bls,beta=-1.0) Aerr = blas.nrm2(Bls) # stabilize A Sc = matrix(0.0,(n,n),'z') w = matrix(0.0, (n,1), 'z') Vs = matrix(0.0, (n,n), 'z') def F(w): return (abs(w) < 1.0) Sc[:,:] = syseA ns = lapack.gees(Sc, w, Vs, select = F) while ns < n: #print "stabilize matrix A" w[ns:] = w[ns:]**-1 Sc[::n+1] = w Sc = Vs*Sc*Vs.H syseA[:,:] = Sc.real() Sc[:,:] = syseA ns = lapack.gees(Sc, w, Vs, select = F) # estimate B,D,x0 stored in vector [x0; vec(D); vec(B)] F1 = matrix(0.0,(p*N,n)) F1[:p,:] = syseC for ii in xrange(1,N): F1[ii*p:(ii+1)*p,:] = F1[(ii-1)*p:ii*p,:]*syseA F2 = matrix(0.0,(p*N,p*m)) ut = u.T for ii in xrange(p): F2[ii::p,ii::p] = ut F3 = matrix(0.0,(p*N,n*m)) F3t = matrix(0.0,(p*(N-1),n*m)) for ii in xrange(1,N): for jj in xrange(p): for kk in xrange(n): F3t[jj:jj+(N-ii)*p:p,kk::n] = ut[:N-ii,:]*F1[(ii-1)*p+jj,kk] F3[ii*p:,:] = F3[ii*p:,:] + F3t[:(N-ii)*p,:] F = matrix([[F1],[F2],[F3]]) yls = y[:] Sls = matrix(0.0,(F.size[1],1)) Uls = matrix(0.0,(F.size[0],F.size[1])) Vtls = matrix(0.0,(F.size[1],F.size[1])) lapack.gesvd(F, Sls, jobu='S', jobvt='S', U=Uls, Vt=Vtls) Frank=len([ii for ii in xrange(Sls.size[0]) if Sls[ii] >= 1E-6]) #print 'Rank deficiency = ', F.size[1] - Frank xx = matrix(0.0,(F.size[1],1)) xx[:Frank] = Uls.T[:Frank,:] * yls xx[:Frank] = base.mul(xx[:Frank],Sls[:Frank]**-1) xx[:] = Vtls.T[:,:Frank]*xx[:Frank] blas.gemv(F,xx,yls,beta=-1.0) xxerr = blas.nrm2(yls) x0 = xx[:n] syseD = xx[n:n+p*m] syseD.size = (p,m) syseB = xx[n+p*m:] syseB.size = (n,m) return {'A': syseA, 'B': syseB, 'C': syseC, 'D': syseD, 'svN': svN, 'sv': \ sv, 'x0': x0, 'n': n, 'Aerr': Aerr, 'xxerr': xxerr}
def F(W): for j in xrange(N): # SVD R[j] = U[j] * diag(sig[j]) * Vt[j] lapack.gesvd(+W['r'][j], sv[j], jobu='A', jobvt='A', U=U[j], Vt=Vt[j]) # Vt[j] := diag(sig[j])^-1 * Vt[j] for k in xrange(ns[j]): blas.tbsv(sv[j], Vt[j], n=ns[j], k=0, ldA=1, offsetx=k * ns[j]) # Gamma[j] is an ns[j] x ns[j] symmetric matrix # # (sig[j] * sig[j]') ./ sqrt(1 + rho * (sig[j] * sig[j]').^2) # S = sig[j] * sig[j]' S = matrix(0.0, (ns[j], ns[j])) blas.syrk(sv[j], S) Gamma[j][:] = div(S, sqrt(1.0 + rho * S**2))[:] symmetrize(Gamma[j], ns[j]) # As represents the scaled mapping # # As(x) = A(u * (Gamma .* x) * u') # As'(y) = Gamma .* (u' * A'(y) * u) # # stored in a similar format as A, except that we use packed # storage for the columns of As[i][j]. for i in xrange(M): if (type(A[i][j]) is matrix) or (type(A[i][j]) is spmatrix): # As[i][j][:,k] = vec( # (U[j]' * mat( A[i][j][:,k] ) * U[j]) .* Gamma[j]) copy(A[i][j], As[i][j]) As[i][j] = matrix(As[i][j]) for k in xrange(ms[i]): cngrnc(U[j], As[i][j], trans='T', offsetx=k * (ns[j]**2), n=ns[j]) blas.tbmv(Gamma[j], As[i][j], n=ns[j]**2, k=0, ldA=1, offsetx=k * (ns[j]**2)) # pack As[i][j] in place #pack_ip(As[i][j], ns[j]) for k in xrange(As[i][j].size[1]): tmp = +As[i][j][:, k] misc.pack2(tmp, {'l': 0, 'q': [], 's': [ns[j]]}) As[i][j][:, k] = tmp else: As[i][j] = 0.0 # H is an m times m block matrix with i, k block # # Hik = sum_j As[i,j]' * As[k,j] # # of size ms[i] x ms[k]. Hik = 0 if As[i,j] or As[k,j] # are zero for all j H = spmatrix([], [], [], (sum(ms), sum(ms))) rowid = 0 for i in xrange(M): colid = 0 for k in xrange(i + 1): sparse_block = True Hik = matrix(0.0, (ms[i], ms[k])) for j in xrange(N): if (type(As[i][j]) is matrix) and \ (type(As[k][j]) is matrix): sparse_block = False # Hik += As[i,j]' * As[k,j] if i == k: blas.syrk(As[i][j], Hik, trans='T', beta=1.0, k=ns[j] * (ns[j] + 1) / 2, ldA=ns[j]**2) else: blas.gemm(As[i][j], As[k][j], Hik, transA='T', beta=1.0, k=ns[j] * (ns[j] + 1) / 2, ldA=ns[j]**2, ldB=ns[j]**2) if not (sparse_block): H[rowid:rowid+ms[i], colid:colid+ms[k]] \ = sparse(Hik) colid += ms[k] rowid += ms[i] HF = cholmod.symbolic(H) cholmod.numeric(H, HF) def solve(x, y, z): """ Returns solution of rho * ux + A'(uy) - r^-T * uz * r^-1 = bx A(ux) = by -ux - r * uz * r' = bz. On entry, x = bx, y = by, z = bz. On exit, x = ux, y = uy, z = uz. """ # bz is a copy of z in the format of x blas.copy(z, bz) # x := x + rho * bz # = bx + rho * bz blas.axpy(bz, x, alpha=rho) # x := Gamma .* (u' * x * u) # = Gamma .* (u' * (bx + rho * bz) * u) offsetj = 0 for j in xrange(N): cngrnc(U[j], x, trans='T', offsetx=offsetj, n=ns[j]) blas.tbmv(Gamma[j], x, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) offsetj += ns[j]**2 # y := y - As(x) # := by - As( Gamma .* u' * (bx + rho * bz) * u) blas.copy(x, xp) offsetj = 0 for j in xrange(N): misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 offseti = 0 for i in xrange(M): offsetj = 0 for j in xrange(N): if type(As[i][j]) is matrix: blas.gemv(As[i][j], xp, y, trans='T', alpha=-1.0, beta=1.0, m=ns[j] * (ns[j] + 1) / 2, n=ms[i], ldA=ns[j]**2, offsetx=offsetj, offsety=offseti) offsetj += ns[j]**2 offseti += ms[i] # y := -y - A(bz) # = -by - A(bz) + As(Gamma .* (u' * (bx + rho * bz) * u) Af(bz, y, alpha=-1.0, beta=-1.0) # y := H^-1 * y # = H^-1 ( -by - A(bz) + As(Gamma.* u'*(bx + rho*bz)*u) ) # = uy cholmod.solve(HF, y) # bz = Vt' * vz * Vt # = uz where # vz := Gamma .* ( As'(uy) - x ) # = Gamma .* ( As'(uy) - Gamma .* (u'*(bx + rho *bz)*u) ) # = Gamma.^2 .* ( u' * (A'(uy) - bx - rho * bz) * u ). blas.copy(x, xp) offsetj = 0 for j in xrange(N): # xp is -x[j] = -Gamma .* (u' * (bx + rho*bz) * u) # in packed storage misc.pack2(xp, {'l': offsetj, 'q': [], 's': [ns[j]]}) offsetj += ns[j]**2 blas.scal(-1.0, xp) offsetj = 0 for j in xrange(N): # xp += As'(uy) offseti = 0 for i in xrange(M): if type(As[i][j]) is matrix: blas.gemv(As[i][j], y, xp, alpha = 1.0, beta = 1.0, m = ns[j]*(ns[j]+1)/2, \ n = ms[i],ldA = ns[j]**2, \ offsetx = offseti, offsety = offsetj) offseti += ms[i] # bz[j] is xp unpacked and multiplied with Gamma #unpack(xp, bz[j], ns[j]) misc.unpack(xp, bz, { 'l': 0, 'q': [], 's': [ns[j]] }, offsetx=offsetj, offsety=offsetj) blas.tbmv(Gamma[j], bz, n=ns[j]**2, k=0, ldA=1, offsetx=offsetj) # bz = Vt' * bz * Vt # = uz cngrnc(Vt[j], bz, trans='T', offsetx=offsetj, n=ns[j]) symmetrize(bz, ns[j], offset=offsetj) offsetj += ns[j]**2 # x = -bz - r * uz * r' blas.copy(z, x) blas.copy(bz, z) offsetj = 0 for j in xrange(N): cngrnc(+W['r'][j], bz, offsetx=offsetj, n=ns[j]) offsetj += ns[j]**2 blas.axpy(bz, x) blas.scal(-1.0, x) return solve
def trmm(L, B, alpha = 1.0, trans = 'N', nrhs = None, offsetB = 0, ldB = None): r""" Multiplication with sparse triangular matrix. Computes .. math:: B &:= \alpha L B \text{ if trans is 'N'} \\ B &:= \alpha L^T B \text{ if trans is 'T'} where :math:`L` is a :py:class:`cspmatrix` factor. :param L: :py:class:`cspmatrix` factor :param B: matrix :param alpha: float (default: 1.0) :param trans: 'N' or 'T' (default: 'N') :param nrhs: number of right-hand sides (default: number of columns in :math:`B`) :param offsetB: integer (default: 0) :param ldB: leading dimension of :math:`B` (default: number of rows in :math:`B`) """ assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" assert isinstance(B, matrix), "B must be a matrix" if ldB is None: ldB = B.size[0] if nrhs is None: nrhs = B.size[1] assert trans in ['N', 'T'] n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr snode = L.symb.snode chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval p = L.symb.p if p is None: p = range(n) stack = [] if trans is 'N': for k in snpost: nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # extract and scale block from rhs Uk = matrix(0.0,(nj,nrhs)) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): Uk[i,j] = alpha*B[offsetB + j*ldB + p[ir]] blas.trmm(blkval, Uk, m = nn, n = nrhs, offsetA = blkptr[k], ldA = nj) if na > 0: # compute new contribution (to be stacked) blas.gemm(blkval, Uk, Uk, m = na, n = nrhs, k = nn, alpha = 1.0,\ offsetA = blkptr[k]+nn, ldA = nj, offsetC = nn) # add contributions from children for _ in range(chptr[k],chptr[k+1]): Ui, i = stack.pop() r = relidx[relptr[i]:relptr[i+1]] Uk[r,:] += Ui # if k is not a root node if na > 0: stack.append((Uk[nn:,:],k)) # copy block to rhs for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): B[offsetB + j*ldB + p[ir]] = Uk[i,j] else: # trans is 'T' for k in reversed(list(snpost)): nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # extract and scale block from rhs Uk = matrix(0.0,(nj,nrhs)) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): Uk[i,j] = alpha*B[offsetB + j*ldB + p[ir]] # if k is not a root node if na > 0: Uk[nn:,:] = stack.pop() # stack contributions for children for ii in range(chptr[k],chptr[k+1]): i = chidx[ii] stack.append(Uk[relidx[relptr[i]:relptr[i+1]],:]) if na > 0: blas.gemm(blkval, Uk, Uk, alpha = 1.0, beta = 1.0, m = nn, n = nrhs, k = na,\ transA = 'T', offsetA = blkptr[k]+nn, ldA = nj, offsetB = nn) # scale and copy block to rhs blas.trmm(blkval, Uk, transA = 'T', m = nn, n = nrhs, offsetA = blkptr[k], ldA = nj) for j in range(nrhs): for i,ir in enumerate(snode[snptr[k]:snptr[k+1]]): B[offsetB + j*ldB + p[ir]] = Uk[i,j] return
def mcsvm(X, labels, gamma, kernel='linear', sigma=1.0, degree=1): """ Solves the Crammer and Singer multiclass SVM training problem maximize -(1/2) * tr(U' * Q * U) + tr(E' * U) subject to U <= gamma * E U * 1_m = 0. The variable is an (N x m)-matrix U if N is the number of training examples and m the number of classes. Q is a positive definite matrix of order N with Q[i,j] = K(xi, xj) where K is a kernel function and xi is the ith row of X. The matrix E is an N x m matrix with E[i,j] = 1 if labels[i] = j and E[i,j] = 0 otherwise. Input arguments. X is a N x n matrix. The rows are the training vectors. labels is a list of integers of length N with values 0, ..., m-1. labels[i] is the class of training example i. gamma is a positive parameter. kernel is a string with values 'linear' or 'poly'. 'linear': K(u,v) = u'*v. 'poly': K(u,v) = (u'*v / sigma)**degree. sigma is a positive number. degree is a positive integer. Output. Returns a function classifier(). If Y is M x n then classifier(Y) returns a list with as its kth element argmax { j = 0, ..., m-1 | sum_{i=1}^N U[i,j] * K(xi, yk) } where yk' = Y[k, :], xi' = X[i, :], and U is the optimal solution of the QP. """ N, n = X.size m = max(labels) + 1 E = matrix(0.0, (N, m)) E[matrix(range(N)) + N * matrix(labels)] = 1.0 def G(x, y, alpha=1.0, beta=0.0, trans='N'): """ If trans is 'N', x is an N x m matrix, and y is an N*m-vector. y := alpha * x[:] + beta * y. If trans is 'T', x is an N*m vector, and y is an N x m matrix. y[:] := alpha * x + beta * y[:]. """ blas.scal(beta, y) blas.axpy(x, y, alpha) h = matrix(gamma * E, (N * m, 1)) ones = matrix(1.0, (m, 1)) def A(x, y, alpha=1.0, beta=0.0, trans='N'): """ If trans is 'N', x is an N x m matrix and y an N-vector. y := alpha * x * 1_m + beta y. If trans is 'T', x is an N vector and y an N x m matrix. y := alpha * x * 1_m' + beta y. """ if trans == 'N': blas.gemv(x, ones, y, alpha=alpha, beta=beta) else: blas.scal(beta, y) blas.ger(x, ones, y, alpha=alpha) b = matrix(0.0, (N, 1)) if kernel == 'linear' and N > n: def P(x, y, alpha=1.0, beta=0.0): """ x and y are N x m matrices. y = alpha * X * X' * x + beta * y. """ z = matrix(0.0, (n, m)) blas.gemm(X, x, z, transA='T') blas.gemm(X, z, y, alpha=alpha, beta=beta) else: if kernel == 'linear': # Q = X * X' Q = matrix(0.0, (N, N)) blas.syrk(X, Q) elif kernel == 'poly': # Q = (X * X' / sigma) ** degree Q = matrix(0.0, (N, N)) blas.syrk(X, Q, alpha=1.0 / sigma) Q = Q**degree else: raise ValueError("invalid kernel type") def P(x, y, alpha=1.0, beta=0.0): """ x and y are N x m matrices. y = alpha * Q * x + beta * y. """ blas.symm(Q, x, y, alpha=alpha, beta=beta) if kernel == 'linear' and N > n: # add separate code for n <= N <= m*n H = [matrix(0.0, (n, n)) for k in range(m)] S = matrix(0.0, (m * n, m * n)) Xs = matrix(0.0, (N, n)) wnm = matrix(0.0, (m * n, 1)) wN = matrix(0.0, (N, 1)) D = matrix(0.0, (N, 1)) def kkt(W): """ KKT solver for X*X' * ux + uy * 1_m' + mat(uz) = bx ux * 1_m = by ux - d.^2 .* mat(uz) = mat(bz). ux and bx are N x m matrices. uy and by are N-vectors. uz and bz are N*m-vectors. mat(uz) is the N x m matrix that satisfies mat(uz)[:] = uz. d = mat(W['d']) a positive N x m matrix. If we eliminate uz from the last equation using mat(uz) = (ux - mat(bz)) ./ d.^2 we get two equations in ux, uy: X*X' * ux + ux ./ d.^2 + uy * 1_m' = bx + mat(bz) ./ d.^2 ux * 1_m = by. From the 1st equation, uxk = (X*X' + Dk^-2)^-1 * (-uy + bxk + Dk^-2 * bzk) = Dk * (I + Xk*Xk')^-1 * Dk * (-uy + bxk + Dk^-2 * bzk) for k = 1, ..., m, where Dk = diag(d[:,k]), Xk = Dk * X, uxk is column k of ux, and bzk is column k of mat(bz). We use the matrix inversion lemma ( I + Xk * Xk' )^-1 = I - Xk * (I + Xk' * Xk)^-1 * Xk' = I - Xk * Hk^-1 * Xk' = I - Xk * Lk^-T * Lk^-1 * Xk' where Hk = I + Xk' * Xk = Lk * Lk' to write this as uxk = Dk * (I - Xk * Hk^-1 * Xk') * Dk * (-uy + bxk + Dk^-2 * bzk) = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk). Substituting this in the second equation gives an equation for uy: sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2 ) * uy = -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 * bzk ), i.e., with D = (sum_k Dk^2)^1/2, Yk = D^-1 * Dk^2 * X * Lk^-T, D * ( I - sum_k Yk * Yk' ) * D * uy = -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ). Another application of the matrix inversion lemma gives uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k ( Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2 ) * ( bxk + Dk^-2 *bzk ) ) with S = I - Y' * Y, Y = [ Y1 ... Ym ]. Summary: 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### d = matrix(W['d'], (N, m)) dsq = matrix(W['d']**2, (N, m)) # Factor the matrices # # H[k] = I + Xk' * Xk # = I + X' * Dk^2 * X. # # Dk = diag(d[:,k]). for k in range(m): # H[k] = I blas.scal(0.0, H[k]) H[k][::n + 1] = 1.0 # Xs = Dk * X # = diag(d[:,k]]) * X blas.copy(X, Xs) for j in range(n): blas.tbmv(d, Xs, n=N, k=0, ldA=1, offsetA=k * N, offsetx=j * N) # H[k] := H[k] + Xs' * Xs # = I + Xk' * Xk blas.syrk(Xs, H[k], trans='T', beta=1.0) # Factorization H[k] = Lk * Lk' lapack.potrf(H[k]) ### utime, stime = cputime() print("Factor Hk's: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # diag(D) = ( sum_k d[:,k]**2 ) ** 1/2 # = ( sum_k Dk^2) ** 1/2. blas.gemv(dsq, ones, D) D[:] = sqrt(D) ### # utime, stime = cputime() # print("Compute D: utime = %.2f, stime = %.2f" \ # %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # S = I - Y'* Y is an m x m block matrix. # The i,j block of Y' * Y is # # Yi' * Yj = Li^-1 * X' * Di^2 * D^-1 * Dj^2 * X * Lj^-T. # # We compute only the lower triangular blocks in Y'*Y. blas.scal(0.0, S) for i in range(m): for j in range(i + 1): # Xs = Di * Dj * D^-1 * X blas.copy(X, Xs) blas.copy(d, wN, n=N, offsetx=i * N) blas.tbmv(d, wN, n=N, k=0, ldA=1, offsetA=j * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) for k in range(n): blas.tbmv(wN, Xs, n=N, k=0, ldA=1, offsetx=k * N) # block i, j of S is Xs' * Xs (as nonsymmetric matrix so we # get the correct multiple after scaling with Li, Lj) blas.gemm(Xs, Xs, S, transA='T', ldC=m * n, offsetC=(j * n) * m * n + i * n) ### utime, stime = cputime() print("Form S: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### for i in range(m): # multiply block row i of S on the left with Li^-1 blas.trsm(H[i], S, m=n, n=(i + 1) * n, ldB=m * n, offsetB=i * n) # multiply block column i of S on the right with Li^-T blas.trsm(H[i], S, side='R', transA='T', m=(m - i) * n, n=n, ldB=m * n, offsetB=i * n * (m * n + 1)) blas.scal(-1.0, S) S[::(m * n + 1)] += 1.0 ### utime, stime = cputime() print("Form S (2): utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # S = L*L' lapack.potrf(S) ### utime, stime = cputime() print("Factor S: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### def f(x, y, z): """ 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### # xk := Dk^2 * xk + zk # = Dk^2 * bxk + bzk blas.tbmv(dsq, x, n=N * m, k=0, ldA=1) blas.axpy(z, x) # y := -y + sum_k ( I - Dk^2 * X * Hk^-1 * X' ) * xk # = -y + x*ones - sum_k Dk^2 * X * Hk^-1 * X' * xk # y := -y + x*ones blas.gemv(x, ones, y, alpha=1.0, beta=-1.0) # wnm = X' * x (wnm interpreted as an n x m matrix) blas.gemm(X, x, wnm, m=n, k=N, n=m, transA='T', ldB=N, ldC=n) # wnm[:,k] = Hk \ wnm[:,k] (for wnm as an n x m matrix) for k in range(m): lapack.potrs(H[k], wnm, offsetB=k * n) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=n * k) # wN = Dk^2 * wN blas.tbmv(dsq[:, k], wN, n=N, k=0, ldA=1) # y := y - wN blas.axpy(wN, y, -1.0) # y = D^-1 * (I + Y * S^-1 * Y') * D^-1 * y # # Y = [Y1 ... Ym ], Yk = D^-1 * Dk^2 * X * Lk^-T. # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # wnm = Y' * y (interpreted as an Nm vector) # = [ L1^-1 * X' * D1^2 * D^-1 * y; # L2^-1 * X' * D2^2 * D^-1 * y; # ... # Lm^-1 * X' * Dm^2 * D^-1 * y ] for k in range(m): # wN = D^-1 * Dk^2 * y blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # wnm[:,k] = X' * wN blas.gemv(X, wN, wnm, trans='T', offsety=k * n) # wnm[:,k] = Lk^-1 * wnm[:,k] blas.trsv(H[k], wnm, offsetx=k * n) # wnm := S^-1 * wnm (an mn-vector) lapack.potrs(S, wnm) # y := y + Y * wnm # = y + D^-1 * [ D1^2 * X * L1^-T ... D2^k * X * Lk^-T] # * wnm for k in range(m): # wnm[:,k] = Lk^-T * wnm[:,k] blas.trsv(H[k], wnm, trans='T', offsetx=k * n) # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = D^-1 * Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # y += wN blas.axpy(wN, y) # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # For k = 1, ..., m: # # xk = (I - Dk^2 * X * Hk^-1 * X') * (-Dk^2 * y + xk) # x = x - [ D1^2 * y ... Dm^2 * y] (as an N x m matrix) for k in range(m): blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.axpy(wN, x, -1.0, offsety=k * N) # wnm = X' * x (as an n x m matrix) blas.gemm(X, x, wnm, transA='T', m=n, n=m, k=N, ldB=N, ldC=n) # wnm[:,k] = Hk^-1 * wnm[:,k] for k in range(m): lapack.potrs(H[k], wnm, offsetB=n * k) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) # x[:,k] := x[:,k] - wN blas.axpy(wN, x, -1.0, n=N, offsety=k * N) # z := ( x - z ) ./ d blas.axpy(x, z, -1.0) blas.scal(-1.0, z) blas.tbsv(d, z, n=N * m, k=0, ldA=1) ### utime, stime = cputime() print("Solve: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) ### return f else: H = [matrix(0.0, (N, N)) for k in range(m)] S = matrix(0.0, (N, N)) def kkt(W): """ KKT solver for Q * ux + uy * 1_m' + mat(uz) = bx ux * 1_m = by ux - d.^2 .* mat(uz) = mat(bz). ux and bx are N x m matrices. uy and by are N-vectors. uz and bz are N*m-vectors. mat(uz) is the N x m matrix that satisfies mat(uz)[:] = uz. d = mat(W['d']) a positive N x m matrix. If we eliminate uz from the last equation using mat(uz) = (ux - mat(bz)) ./ d.^2 we get two equations in ux, uy: Q * ux + ux ./ d.^2 + uy * 1_m' = bx + mat(bz) ./ d.^2 ux * 1_m = by. From the 1st equation uxk = -(Q + Dk)^-1 * uy + (Q + Dk)^-1 * (bxk + Dk * bzk) where uxk is column k of ux, Dk = diag(d[:,k].^-2), and bzk is column k of mat(bz). Substituting this in the second equation gives an equation for uy. 1. Solve for uy sum_k (Q + Dk)^-1 * uy = sum_k (Q + Dk)^-1 * (bxk + Dk * bzk) - by. 2. Solve for ux (column by column) Q * ux + ux ./ d.^2 = bx + mat(bz) ./ d.^2 - uy * 1_m'. 3. Solve for uz mat(uz) = ( ux - mat(bz) ) ./ d.^2. Return ux, uy, d .* uz. """ # D = d.^-2 D = matrix(W['di']**2, (N, m)) blas.scal(0.0, S) for k in range(m): # Hk := Q + Dk blas.copy(Q, H[k]) H[k][::N + 1] += D[:, k] # Hk := Hk^-1 # = (Q + Dk)^-1 lapack.potrf(H[k]) lapack.potri(H[k]) # S := S + Hk # = S + (Q + Dk)^-1 blas.axpy(H[k], S) # Factor S = sum_k (Q + Dk)^-1 lapack.potrf(S) def f(x, y, z): # z := mat(z) # = mat(bz) z.size = N, m # x := x + D .* z # = bx + mat(bz) ./ d.^2 x += mul(D, z) # y := y - sum_k (Q + Dk)^-1 * X[:,k] # = by - sum_k (Q + Dk)^-1 * (bxk + Dk * bzk) for k in range(m): blas.symv(H[k], x[:, k], y, alpha=-1.0, beta=1.0) # y := H^-1 * y # = -uy lapack.potrs(S, y) # x[:,k] := H[k] * (x[:,k] + y) # = (Q + Dk)^-1 * (bxk + bzk ./ d.^2 + y) # = ux[:,k] w = matrix(0.0, (N, 1)) for k in range(m): # x[:,k] := x[:,k] + y blas.axpy(y, x, offsety=N * k, n=N) # w := H[k] * x[:,k] # = (Q + Dk)^-1 * (bxk + bzk ./ d.^2 + y) blas.symv(H[k], x, w, offsetx=N * k) # x[:,k] := w # = ux[:,k] blas.copy(w, x, offsety=N * k) # y := -y # = uy blas.scal(-1.0, y) # z := (x - z) ./ d blas.axpy(x, z, -1.0) blas.tbsv(W['d'], z, n=m * N, k=0, ldA=1) blas.scal(-1.0, z) z.size = N * m, 1 return f utime0, stime0 = cputime() # solvers.options['debug'] = True # solvers.options['maxiters'] = 1 solvers.options['refinement'] = 1 sol = solvers.coneqp(P, -E, G, h, A=A, b=b, kktsolver=kkt, xnewcopy=matrix, xdot=blas.dot, xaxpy=blas.axpy, xscal=blas.scal) utime, stime = cputime() utime -= utime0 stime -= stime0 print("utime = %.2f, stime = %.2f" % (utime, stime)) U = sol['x'] if kernel == 'linear': # W = X' * U W = matrix(0.0, (n, m)) blas.gemm(X, U, W, transA='T') def classifier(Y): # return [ argmax of Y[k,:] * W for k in range(M) ] M = Y.size[0] S = Y * W c = [] for i in range(M): a = zip(list(S[i, :]), range(m)) a.sort(reverse=True) c += [a[0][1]] return c elif kernel == 'poly': def classifier(Y): M = Y.size[0] # K = Y * X' / sigma K = matrix(0.0, (M, N)) blas.gemm(Y, X, K, transB='T', alpha=1.0 / sigma) S = K**degree * U c = [] for i in range(M): a = zip(list(S[i, :]), range(m)) a.sort(reverse=True) c += [a[0][1]] return c else: pass return classifier #, utime, sol['iterations']
def F(W): """ Create a solver for the linear equations C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - Dl^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - r*r' * [ ] * r*r' = bzs [ -A(ux) -uX2 ] [ uzs21 uzs22 ] where Dl = diag(W['l']), r = W['r'][0]. On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ Dl*uzl; (r'*uzs*r)[:] ]. 1. Compute matrices V1, V2 such that (with T = r*r') [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and S = [ diag(s); 0 ], s a positive q-vector. 2. Factor the mapping X -> X + S * X' * S: X + S * X' * S = L( L'( X )). 3. Compute scaled mappings: a matrix As with as its columns the coefficients of the scaled mapping L^-1( V2' * A() * V1' ) and the matrix Gs = Dl^-1 * G. 4. Cholesky factorization of H = C + Gs'*Gs + 2*As'*As. """ # 1. Compute V1, V2, s. r = W['r'][0] # LQ factorization R[:q, :] = L1 * Q1. lapack.lacpy(r, Q1, m = q) lapack.gelqf(Q1, tau1) lapack.lacpy(Q1, L1, n = q, uplo = 'L') lapack.orglq(Q1, tau1) # LQ factorization R[q:, :] = L2 * Q2. lapack.lacpy(r, Q2, m = p, offsetA = q) lapack.gelqf(Q2, tau2) lapack.lacpy(Q2, L2, n = p, uplo = 'L') lapack.orglq(Q2, tau2) # V2, V1, s are computed from an SVD: if # # Q2 * Q1' = U * diag(s) * V', # # then V1 = V' * L1^-1 and V2 = L2^-T * U. # T21 = Q2 * Q1.T blas.gemm(Q2, Q1, T21, transB = 'T') # SVD T21 = U * diag(s) * V'. Store U in V2 and V' in V1. lapack.gesvd(T21, s, jobu = 'A', jobvt = 'A', U = V2, Vt = V1) # # Q2 := Q2 * Q1' without extracting Q1; store T21 in Q2 # this will requires lapack.ormlq or lapack.unmlq # V2 = L2^-T * U blas.trsm(L2, V2, transA = 'T') # V1 = V' * L1^-1 blas.trsm(L1, V1, side = 'R') # 2. Factorization X + S * X' * S = L( L'( X )). # # The factor L is stored as a diagonal matrix D and a sparse lower # triangular matrix P, such that # # L(X)[:] = D**-1 * (I + P) * X[:] # L^-1(X)[:] = D * (I - P) * X[:]. # SS is q x q with SS[i,j] = si*sj. blas.scal(0.0, SS) blas.syr(s, SS) # For a p x q matrix X, P*X[:] is Y[:] where # # Yij = si * sj * Xji if i < j # = 0 otherwise. # P.V = SS[Itril2] # For a p x q matrix X, D*X[:] is Y[:] where # # Yij = Xij / sqrt( 1 - si^2 * sj^2 ) if i < j # = Xii / sqrt( 1 + si^2 ) if i = j # = Xij otherwise. # DV[Idiag] = sqrt(1.0 + SS[::q+1]) DV[Itriu] = sqrt(1.0 - SS[Itril3]**2) D.V = DV**-1 # 3. Scaled linear mappings # Ask := V2' * Ask * V1' blas.scal(0.0, As) base.axpy(A, As) for i in xrange(n): # tmp := V2' * As[i, :] blas.gemm(V2, As, tmp, transA = 'T', m = p, n = q, k = p, ldB = p, offsetB = i*p*q) # As[:,i] := tmp * V1' blas.gemm(tmp, V1, As, transB = 'T', m = p, n = q, k = q, ldC = p, offsetC = i*p*q) # As := D * (I - P) * As # = L^-1 * As. blas.copy(As, As2) base.gemm(P, As, As2, alpha = -1.0, beta = 1.0) base.gemm(D, As2, As) # Gs := Dl^-1 * G blas.scal(0.0, Gs) base.axpy(G, Gs) for k in xrange(n): blas.tbmv(W['di'], Gs, n = m, k = 0, ldA = 1, offsetx = k*m) # 4. Cholesky factorization of H = C + Gs' * Gs + 2 * As' * As. blas.syrk(As, H, trans = 'T', alpha = 2.0) blas.syrk(Gs, H, trans = 'T', beta = 1.0) base.axpy(C, H) lapack.potrf(H) def f(x, y, z): """ Solve C * ux + G' * uzl - 2*A'(uzs21) = bx -uzs11 = bX1 -uzs22 = bX2 G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ uzs11 uzs21' ] [ ] - T * [ ] * T = bzs. [ -A(ux) -uX2 ] [ uzs21 uzs22 ] On entry, x = (bx, bX1, bX2) and z = [ bzl; bzs[:] ]. On exit, x = (ux, uX1, uX2) and z = [ D*uzl; (r'*uzs*r)[:] ]. Define X = uzs21, Z = T * uzs * T: C * ux + G' * uzl - 2*A'(X) = bx [ 0 X' ] [ bX1 0 ] T * [ ] * T - Z = T * [ ] * T [ X 0 ] [ 0 bX2 ] G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs [ -A(ux) -uX2 ] [ Z21 Z22 ] Return x = (ux, uX1, uX2), z = [ D*uzl; (rti'*Z*rti)[:] ]. We use the congruence transformation [ V1 0 ] [ T11 T21' ] [ V1' 0 ] [ I S' ] [ ] [ ] [ ] = [ ] [ 0 V2' ] [ T21 T22 ] [ 0 V2 ] [ S I ] and the factorization X + S * X' * S = L( L'(X) ) to write this as C * ux + G' * uzl - 2*A'(X) = bx L'(V2^-1 * X * V1^-1) - L^-1(V2' * Z21 * V1') = bX G * ux - D^2 * uzl = bzl [ -uX1 -A(ux)' ] [ Z11 Z21' ] [ ] - [ ] = bzs, [ -A(ux) -uX2 ] [ Z21 Z22 ] or C * ux + Gs' * uuzl - 2*As'(XX) = bx XX - ZZ21 = bX Gs * ux - uuzl = D^-1 * bzl -As(ux) - ZZ21 = bbzs_21 -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 if we introduce scaled variables uuzl = D * uzl XX = L'(V2^-1 * X * V1^-1) = L'(V2^-1 * uzs21 * V1^-1) ZZ21 = L^-1(V2' * Z21 * V1') and define bbzs_21 = L^-1(V2' * bzs_21 * V1') [ bX1 0 ] bX = L^-1( V2' * (T * [ ] * T)_21 * V1'). [ 0 bX2 ] Eliminating Z21 gives C * ux + Gs' * uuzl - 2*As'(XX) = bx Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22 and eliminating uuzl and XX gives H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bX - bbzs_21) Gs * ux - uuzl = D^-1 * bzl -As(ux) - XX = bbzs_21 - bX -uX1 - Z11 = bzs_11 -uX2 - Z22 = bzs_22. In summary, we can use the following algorithm: 1. bXX := bX - bbzs21 [ bX1 0 ] = L^-1( V2' * ((T * [ ] * T)_21 - bzs_21) * V1') [ 0 bX2 ] 2. Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). 3. From ux, compute uuzl = Gs*ux - D^-1 * bzl and X = V2 * L^-T(-As(ux) + bXX) * V1. 4. Return ux, uuzl, rti' * Z * rti = r' * [ -bX1, X'; X, -bX2 ] * r and uX1 = -Z11 - bzs_11, uX2 = -Z22 - bzs_22. """ # Save bzs_11, bzs_22, bzs_21. lapack.lacpy(z, bz11, uplo = 'L', m = q, n = q, ldA = p+q, offsetA = m) lapack.lacpy(z, bz21, m = p, n = q, ldA = p+q, offsetA = m+q) lapack.lacpy(z, bz22, uplo = 'L', m = p, n = p, ldA = p+q, offsetA = m + (p+q+1)*q) # zl := D^-1 * zl # = D^-1 * bzl blas.tbmv(W['di'], z, n = m, k = 0, ldA = 1) # zs := r' * [ bX1, 0; 0, bX2 ] * r. # zs := [ bX1, 0; 0, bX2 ] blas.scal(0.0, z, offset = m) lapack.lacpy(x[1], z, uplo = 'L', m = q, n = q, ldB = p+q, offsetB = m) lapack.lacpy(x[2], z, uplo = 'L', m = p, n = p, ldB = p+q, offsetB = m + (p+q+1)*q) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := tril(zs)*r blas.copy(r, a) blas.trmm(z, a, side = 'L', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # zs := a'*r + r'*a blas.syr2k(r, a, z, trans = 'T', n = p+q, k = p+q, ldB = p+q, ldC = p+q, offsetC = m) # bz21 := L^-1( V2' * ((r * zs * r')_21 - bz21) * V1') # # [ bX1 0 ] # = L^-1( V2' * ((T * [ ] * T)_21 - bz21) * V1'). # [ 0 bX2 ] # a = [ r21 r22 ] * z # = [ r21 r22 ] * r' * [ bX1, 0; 0, bX2 ] * r # = [ T21 T22 ] * [ bX1, 0; 0, bX2 ] * r blas.symm(z, r, a, side = 'R', m = p, n = p+q, ldA = p+q, ldC = p+q, offsetB = q) # bz21 := -bz21 + a * [ r11, r12 ]' # = -bz21 + (T * [ bX1, 0; 0, bX2 ] * T)_21 blas.gemm(a, r, bz21, transB = 'T', m = p, n = q, k = p+q, beta = -1.0, ldA = p+q, ldC = p) # bz21 := V2' * bz21 * V1' # = V2' * (-bz21 + (T*[bX1, 0; 0, bX2]*T)_21) * V1' blas.gemm(V2, bz21, tmp, transA = 'T', m = p, n = q, k = p, ldB = p) blas.gemm(tmp, V1, bz21, transB = 'T', m = p, n = q, k = q, ldC = p) # bz21[:] := D * (I-P) * bz21[:] # = L^-1 * bz21[:] # = bXX[:] blas.copy(bz21, tmp) base.gemv(P, bz21, tmp, alpha = -1.0, beta = 1.0) base.gemv(D, tmp, bz21) # Solve H * ux = bx + Gs' * D^-1 * bzl + 2*As'(bXX). # x[0] := x[0] + Gs'*zl + 2*As'(bz21) # = bx + G' * D^-1 * bzl + 2 * As'(bXX) blas.gemv(Gs, z, x[0], trans = 'T', alpha = 1.0, beta = 1.0) blas.gemv(As, bz21, x[0], trans = 'T', alpha = 2.0, beta = 1.0) # x[0] := H \ x[0] # = ux lapack.potrs(H, x[0]) # uuzl = Gs*ux - D^-1 * bzl blas.gemv(Gs, x[0], z, alpha = 1.0, beta = -1.0) # bz21 := V2 * L^-T(-As(ux) + bz21) * V1 # = X blas.gemv(As, x[0], bz21, alpha = -1.0, beta = 1.0) blas.tbsv(DV, bz21, n = p*q, k = 0, ldA = 1) blas.copy(bz21, tmp) base.gemv(P, tmp, bz21, alpha = -1.0, beta = 1.0, trans = 'T') blas.gemm(V2, bz21, tmp) blas.gemm(tmp, V1, bz21) # zs := -zs + r' * [ 0, X'; X, 0 ] * r # = r' * [ -bX1, X'; X, -bX2 ] * r. # a := bz21 * [ r11, r12 ] # = X * [ r11, r12 ] blas.gemm(bz21, r, a, m = p, n = p+q, k = q, ldA = p, ldC = p+q) # z := -z + [ r21, r22 ]' * a + a' * [ r21, r22 ] # = rti' * uzs * rti blas.syr2k(r, a, z, trans = 'T', beta = -1.0, n = p+q, k = p, offsetA = q, offsetC = m, ldB = p+q, ldC = p+q) # uX1 = -Z11 - bzs_11 # = -(r*zs*r')_11 - bzs_11 # uX2 = -Z22 - bzs_22 # = -(r*zs*r')_22 - bzs_22 blas.copy(bz11, x[1]) blas.copy(bz22, x[2]) # scale diagonal of zs by 1/2 blas.scal(0.5, z, inc = p+q+1, offset = m) # a := r*tril(zs) blas.copy(r, a) blas.trmm(z, a, side = 'R', m = p+q, n = p+q, ldA = p+q, ldB = p+q, offsetA = m) # x[1] := -x[1] - a[:q,:] * r[:q, :]' - r[:q,:] * a[:q,:]' # = -bzs_11 - (r*zs*r')_11 blas.syr2k(a, r, x[1], n = q, alpha = -1.0, beta = -1.0) # x[2] := -x[2] - a[q:,:] * r[q:, :]' - r[q:,:] * a[q:,:]' # = -bzs_22 - (r*zs*r')_22 blas.syr2k(a, r, x[2], n = p, alpha = -1.0, beta = -1.0, offsetA = q, offsetB = q) # scale diagonal of zs by 1/2 blas.scal(2.0, z, inc = p+q+1, offset = m) return f
def f(x, y, z): """ 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### # xk := Dk^2 * xk + zk # = Dk^2 * bxk + bzk blas.tbmv(dsq, x, n=N * m, k=0, ldA=1) blas.axpy(z, x) # y := -y + sum_k ( I - Dk^2 * X * Hk^-1 * X' ) * xk # = -y + x*ones - sum_k Dk^2 * X * Hk^-1 * X' * xk # y := -y + x*ones blas.gemv(x, ones, y, alpha=1.0, beta=-1.0) # wnm = X' * x (wnm interpreted as an n x m matrix) blas.gemm(X, x, wnm, m=n, k=N, n=m, transA='T', ldB=N, ldC=n) # wnm[:,k] = Hk \ wnm[:,k] (for wnm as an n x m matrix) for k in range(m): lapack.potrs(H[k], wnm, offsetB=k * n) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=n * k) # wN = Dk^2 * wN blas.tbmv(dsq[:, k], wN, n=N, k=0, ldA=1) # y := y - wN blas.axpy(wN, y, -1.0) # y = D^-1 * (I + Y * S^-1 * Y') * D^-1 * y # # Y = [Y1 ... Ym ], Yk = D^-1 * Dk^2 * X * Lk^-T. # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # wnm = Y' * y (interpreted as an Nm vector) # = [ L1^-1 * X' * D1^2 * D^-1 * y; # L2^-1 * X' * D2^2 * D^-1 * y; # ... # Lm^-1 * X' * Dm^2 * D^-1 * y ] for k in range(m): # wN = D^-1 * Dk^2 * y blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # wnm[:,k] = X' * wN blas.gemv(X, wN, wnm, trans='T', offsety=k * n) # wnm[:,k] = Lk^-1 * wnm[:,k] blas.trsv(H[k], wnm, offsetx=k * n) # wnm := S^-1 * wnm (an mn-vector) lapack.potrs(S, wnm) # y := y + Y * wnm # = y + D^-1 * [ D1^2 * X * L1^-T ... D2^k * X * Lk^-T] # * wnm for k in range(m): # wnm[:,k] = Lk^-T * wnm[:,k] blas.trsv(H[k], wnm, trans='T', offsetx=k * n) # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = D^-1 * Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # y += wN blas.axpy(wN, y) # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # For k = 1, ..., m: # # xk = (I - Dk^2 * X * Hk^-1 * X') * (-Dk^2 * y + xk) # x = x - [ D1^2 * y ... Dm^2 * y] (as an N x m matrix) for k in range(m): blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.axpy(wN, x, -1.0, offsety=k * N) # wnm = X' * x (as an n x m matrix) blas.gemm(X, x, wnm, transA='T', m=n, n=m, k=N, ldB=N, ldC=n) # wnm[:,k] = Hk^-1 * wnm[:,k] for k in range(m): lapack.potrs(H[k], wnm, offsetB=n * k) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) # x[:,k] := x[:,k] - wN blas.axpy(wN, x, -1.0, n=N, offsety=k * N) # z := ( x - z ) ./ d blas.axpy(x, z, -1.0) blas.scal(-1.0, z) blas.tbsv(d, z, n=N * m, k=0, ldA=1) ### utime, stime = cputime() print("Solve: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0))
def trsm(L, B, alpha=1.0, trans='N', nrhs=None, offsetB=0, ldB=None): r""" Solves a triangular system of equations with multiple right-hand sides. Computes .. math:: B &:= \alpha L^{-1} B \text{ if trans is 'N'} \\ B &:= \alpha L^{-T} B \text{ if trans is 'T'} where :math:`L` is a :py:class:`cspmatrix` factor. :param L: :py:class:`cspmatrix` factor :param B: matrix :param alpha: float (default: 1.0) :param trans: 'N' or 'T' (default: 'N') :param nrhs: number of right-hand sides (default: number of columns in :math:`B`) :param offsetB: integer (default: 0) :param ldB: leading dimension of :math:`B` (default: number of rows in :math:`B`) """ assert isinstance( L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" assert isinstance(B, matrix), "B must be a matrix" if ldB is None: ldB = B.size[0] if nrhs is None: nrhs = B.size[1] assert trans in ['N', 'T'] n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr snode = L.symb.snode chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval p = L.symb.p if p is None: p = range(n) stack = [] if trans is 'N': for k in snpost: nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # extract block from rhs Uk = matrix(0.0, (nj, nrhs)) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): Uk[i, j] = alpha * B[offsetB + j * ldB + p[ir]] # add contributions from children for _ in range(chptr[k], chptr[k + 1]): Ui, i = stack.pop() r = relidx[relptr[i]:relptr[i + 1]] Uk[r, :] += Ui # if k is not a root node if na > 0: blas.gemm(blkval, Uk, Uk, alpha = -1.0, beta = 1.0, m = na, n = nrhs, k = nn,\ offsetA = blkptr[k]+nn, ldA = nj, offsetC = nn) stack.append((Uk[nn:, :], k)) # scale and copy block to rhs blas.trsm(blkval, Uk, m=nn, n=nrhs, offsetA=blkptr[k], ldA=nj) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): B[offsetB + j * ldB + p[ir]] = Uk[i, j] else: # trans is 'T' for k in reversed(list(snpost)): nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # extract block from rhs and scale Uk = matrix(0.0, (nj, nrhs)) for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): Uk[i, j] = alpha * B[offsetB + j * ldB + p[ir]] blas.trsm(blkval, Uk, transA='T', m=nn, n=nrhs, offsetA=blkptr[k], ldA=nj) # if k is not a root node if na > 0: Uk[nn:, :] = stack.pop() blas.gemm(blkval, Uk, Uk, alpha = -1.0, beta = 1.0, m = nn, n = nrhs, k = na,\ transA = 'T', offsetA = blkptr[k]+nn, ldA = nj, offsetB = nn) # stack contributions for children for ii in range(chptr[k], chptr[k + 1]): i = chidx[ii] stack.append(Uk[relidx[relptr[i]:relptr[i + 1]], :]) # copy block to rhs for j in range(nrhs): for i, ir in enumerate(snode[snptr[k]:snptr[k + 1]]): B[offsetB + j * ldB + p[ir]] = Uk[i, j] return
def __Y2K(L, U, inv=False): n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr stack = [] alpha = 1.0 if inv: alpha = -1.0 for Ut in U: for k in snpost: nn = snptr[k + 1] - snptr[k] # |Nk| na = relptr[k + 1] - relptr[k] # |Ak| nj = na + nn # allocate F and copy Ut_{Jk,Nk} to leading columns of F F = matrix(0.0, (nj, nj)) lapack.lacpy(Ut.blkval, F, offsetA=blkptr[k], ldA=nj, m=nj, n=nn, uplo='L') if not inv: # add update matrices from children to frontal matrix for i in range(chptr[k + 1] - 1, chptr[k] - 1, -1): Ui = stack.pop() frontal_add_update(F, Ui, relidx, relptr, chidx[i]) if na > 0: # F_{Ak,Ak} := F_{Ak,Ak} - alpha*L_{Ak,Nk}*F_{Ak,Nk}' blas.gemm(L.blkval, F, F, beta = 1.0, alpha = -alpha, m = na, n = na, k = nn,\ ldA = nj, ldB = nj, ldC = nj, transB = 'T',\ offsetA = blkptr[k]+nn, offsetB = nn, offsetC = nn*(nj+1)) # F_{Ak,Nk} := F_{Ak,Nk} - alpha*L_{Ak,Nk}*F_{Nk,Nk} blas.symm(F, L.blkval, F, side = 'R', beta = 1.0, alpha = -alpha,\ m = na, n = nn, ldA = nj, ldB = nj, ldC = nj,\ offsetA = 0, offsetB = blkptr[k]+nn, offsetC = nn) # F_{Ak,Ak} := F_{Ak,Ak} - alpha*F_{Ak,Nk}*L_{Ak,Nk}' blas.gemm(F, L.blkval, F, beta = 1.0, alpha = -alpha, m = na, n = na, k = nn,\ ldA = nj, ldB = nj, ldC = nj, transB = 'T',\ offsetA = nn, offsetB = blkptr[k]+nn, offsetC = nn*(nj+1)) if inv: # add update matrices from children to frontal matrix for i in range(chptr[k + 1] - 1, chptr[k] - 1, -1): Ui = stack.pop() frontal_add_update(F, Ui, relidx, relptr, chidx[i]) if na > 0: # add Uk' to stack Uk = matrix(0.0, (na, na)) lapack.lacpy(F, Uk, m=na, n=na, offsetA=nn * (nj + 1), ldA=nj, uplo='L') stack.append(Uk) # copy the leading Nk columns of frontal matrix to blkval lapack.lacpy(F, Ut.blkval, uplo='L', offsetB=blkptr[k], m=nj, n=nn, ldB=nj) return
def kkt(W): """ KKT solver for X*X' * ux + uy * 1_m' + mat(uz) = bx ux * 1_m = by ux - d.^2 .* mat(uz) = mat(bz). ux and bx are N x m matrices. uy and by are N-vectors. uz and bz are N*m-vectors. mat(uz) is the N x m matrix that satisfies mat(uz)[:] = uz. d = mat(W['d']) a positive N x m matrix. If we eliminate uz from the last equation using mat(uz) = (ux - mat(bz)) ./ d.^2 we get two equations in ux, uy: X*X' * ux + ux ./ d.^2 + uy * 1_m' = bx + mat(bz) ./ d.^2 ux * 1_m = by. From the 1st equation, uxk = (X*X' + Dk^-2)^-1 * (-uy + bxk + Dk^-2 * bzk) = Dk * (I + Xk*Xk')^-1 * Dk * (-uy + bxk + Dk^-2 * bzk) for k = 1, ..., m, where Dk = diag(d[:,k]), Xk = Dk * X, uxk is column k of ux, and bzk is column k of mat(bz). We use the matrix inversion lemma ( I + Xk * Xk' )^-1 = I - Xk * (I + Xk' * Xk)^-1 * Xk' = I - Xk * Hk^-1 * Xk' = I - Xk * Lk^-T * Lk^-1 * Xk' where Hk = I + Xk' * Xk = Lk * Lk' to write this as uxk = Dk * (I - Xk * Hk^-1 * Xk') * Dk * (-uy + bxk + Dk^-2 * bzk) = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk). Substituting this in the second equation gives an equation for uy: sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2 ) * uy = -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 * bzk ), i.e., with D = (sum_k Dk^2)^1/2, Yk = D^-1 * Dk^2 * X * Lk^-T, D * ( I - sum_k Yk * Yk' ) * D * uy = -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ). Another application of the matrix inversion lemma gives uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k ( Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2 ) * ( bxk + Dk^-2 *bzk ) ) with S = I - Y' * Y, Y = [ Y1 ... Ym ]. Summary: 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### d = matrix(W['d'], (N, m)) dsq = matrix(W['d']**2, (N, m)) # Factor the matrices # # H[k] = I + Xk' * Xk # = I + X' * Dk^2 * X. # # Dk = diag(d[:,k]). for k in range(m): # H[k] = I blas.scal(0.0, H[k]) H[k][::n + 1] = 1.0 # Xs = Dk * X # = diag(d[:,k]]) * X blas.copy(X, Xs) for j in range(n): blas.tbmv(d, Xs, n=N, k=0, ldA=1, offsetA=k * N, offsetx=j * N) # H[k] := H[k] + Xs' * Xs # = I + Xk' * Xk blas.syrk(Xs, H[k], trans='T', beta=1.0) # Factorization H[k] = Lk * Lk' lapack.potrf(H[k]) ### utime, stime = cputime() print("Factor Hk's: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # diag(D) = ( sum_k d[:,k]**2 ) ** 1/2 # = ( sum_k Dk^2) ** 1/2. blas.gemv(dsq, ones, D) D[:] = sqrt(D) ### # utime, stime = cputime() # print("Compute D: utime = %.2f, stime = %.2f" \ # %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # S = I - Y'* Y is an m x m block matrix. # The i,j block of Y' * Y is # # Yi' * Yj = Li^-1 * X' * Di^2 * D^-1 * Dj^2 * X * Lj^-T. # # We compute only the lower triangular blocks in Y'*Y. blas.scal(0.0, S) for i in range(m): for j in range(i + 1): # Xs = Di * Dj * D^-1 * X blas.copy(X, Xs) blas.copy(d, wN, n=N, offsetx=i * N) blas.tbmv(d, wN, n=N, k=0, ldA=1, offsetA=j * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) for k in range(n): blas.tbmv(wN, Xs, n=N, k=0, ldA=1, offsetx=k * N) # block i, j of S is Xs' * Xs (as nonsymmetric matrix so we # get the correct multiple after scaling with Li, Lj) blas.gemm(Xs, Xs, S, transA='T', ldC=m * n, offsetC=(j * n) * m * n + i * n) ### utime, stime = cputime() print("Form S: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### for i in range(m): # multiply block row i of S on the left with Li^-1 blas.trsm(H[i], S, m=n, n=(i + 1) * n, ldB=m * n, offsetB=i * n) # multiply block column i of S on the right with Li^-T blas.trsm(H[i], S, side='R', transA='T', m=(m - i) * n, n=n, ldB=m * n, offsetB=i * n * (m * n + 1)) blas.scal(-1.0, S) S[::(m * n + 1)] += 1.0 ### utime, stime = cputime() print("Form S (2): utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### # S = L*L' lapack.potrf(S) ### utime, stime = cputime() print("Factor S: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) utime0, stime0 = cputime() ### def f(x, y, z): """ 1. Compute uy = D^-1 * (I + Y * S^-1 * Y') * D^-1 * ( -by + sum_k (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * ( bxk + Dk^-2 *bzk ) ) 2. For k = 1, ..., m: uxk = (Dk^2 - Dk^2 * X * Hk^-1 * X' * Dk^2) * (-uy + bxk + Dk^-2 * bzk) 3. Solve for uz d .* uz = ( ux - mat(bz) ) ./ d. Return ux, uy, d .* uz. """ ### utime0, stime0 = cputime() ### # xk := Dk^2 * xk + zk # = Dk^2 * bxk + bzk blas.tbmv(dsq, x, n=N * m, k=0, ldA=1) blas.axpy(z, x) # y := -y + sum_k ( I - Dk^2 * X * Hk^-1 * X' ) * xk # = -y + x*ones - sum_k Dk^2 * X * Hk^-1 * X' * xk # y := -y + x*ones blas.gemv(x, ones, y, alpha=1.0, beta=-1.0) # wnm = X' * x (wnm interpreted as an n x m matrix) blas.gemm(X, x, wnm, m=n, k=N, n=m, transA='T', ldB=N, ldC=n) # wnm[:,k] = Hk \ wnm[:,k] (for wnm as an n x m matrix) for k in range(m): lapack.potrs(H[k], wnm, offsetB=k * n) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=n * k) # wN = Dk^2 * wN blas.tbmv(dsq[:, k], wN, n=N, k=0, ldA=1) # y := y - wN blas.axpy(wN, y, -1.0) # y = D^-1 * (I + Y * S^-1 * Y') * D^-1 * y # # Y = [Y1 ... Ym ], Yk = D^-1 * Dk^2 * X * Lk^-T. # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # wnm = Y' * y (interpreted as an Nm vector) # = [ L1^-1 * X' * D1^2 * D^-1 * y; # L2^-1 * X' * D2^2 * D^-1 * y; # ... # Lm^-1 * X' * Dm^2 * D^-1 * y ] for k in range(m): # wN = D^-1 * Dk^2 * y blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # wnm[:,k] = X' * wN blas.gemv(X, wN, wnm, trans='T', offsety=k * n) # wnm[:,k] = Lk^-1 * wnm[:,k] blas.trsv(H[k], wnm, offsetx=k * n) # wnm := S^-1 * wnm (an mn-vector) lapack.potrs(S, wnm) # y := y + Y * wnm # = y + D^-1 * [ D1^2 * X * L1^-T ... D2^k * X * Lk^-T] # * wnm for k in range(m): # wnm[:,k] = Lk^-T * wnm[:,k] blas.trsv(H[k], wnm, trans='T', offsetx=k * n) # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = D^-1 * Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.tbsv(D, wN, n=N, k=0, ldA=1) # y += wN blas.axpy(wN, y) # y := D^-1 * y blas.tbsv(D, y, n=N, k=0, ldA=1) # For k = 1, ..., m: # # xk = (I - Dk^2 * X * Hk^-1 * X') * (-Dk^2 * y + xk) # x = x - [ D1^2 * y ... Dm^2 * y] (as an N x m matrix) for k in range(m): blas.copy(y, wN) blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) blas.axpy(wN, x, -1.0, offsety=k * N) # wnm = X' * x (as an n x m matrix) blas.gemm(X, x, wnm, transA='T', m=n, n=m, k=N, ldB=N, ldC=n) # wnm[:,k] = Hk^-1 * wnm[:,k] for k in range(m): lapack.potrs(H[k], wnm, offsetB=n * k) for k in range(m): # wN = X * wnm[:,k] blas.gemv(X, wnm, wN, offsetx=k * n) # wN = Dk^2 * wN blas.tbmv(dsq, wN, n=N, k=0, ldA=1, offsetA=k * N) # x[:,k] := x[:,k] - wN blas.axpy(wN, x, -1.0, n=N, offsety=k * N) # z := ( x - z ) ./ d blas.axpy(x, z, -1.0) blas.scal(-1.0, z) blas.tbsv(d, z, n=N * m, k=0, ldA=1) ### utime, stime = cputime() print("Solve: utime = %.2f, stime = %.2f" \ %(utime-utime0, stime-stime0)) ### return f
def projected_inverse(L): """ Supernodal multifrontal projected inverse. The routine computes the projected inverse .. math:: Y = P(L^{-T}L^{-1}) where :math:`L` is a Cholesky factor. On exit, the argument :math:`L` contains the projected inverse :math:`Y`. :param L: :py:class:`cspmatrix` (factor) """ assert isinstance(L, cspmatrix) and L.is_factor is True, "L must be a cspmatrix factor" n = L.symb.n snpost = L.symb.snpost snptr = L.symb.snptr chptr = L.symb.chptr chidx = L.symb.chidx relptr = L.symb.relptr relidx = L.symb.relidx blkptr = L.symb.blkptr blkval = L.blkval stack = [] for k in reversed(list(snpost)): nn = snptr[k+1]-snptr[k] # |Nk| na = relptr[k+1]-relptr[k] # |Ak| nj = na + nn # invert factor of D_{Nk,Nk} lapack.trtri(blkval, offsetA = blkptr[k], ldA = nj, n = nn) # zero-out strict upper triangular part of {Nj,Nj} block (just in case!) for i in range(1,nn): blas.scal(0.0, blkval, offset = blkptr[k] + nj*i, n = i) # compute inv(D_{Nk,Nk}) (store in 1,1 block of F) F = matrix(0.0, (nj,nj)) blas.syrk(blkval, F, trans = 'T', offsetA = blkptr[k], ldA = nj, n = nn, k = nn) # if supernode k is not a root node: if na > 0: # copy "update matrix" to 2,2 block of F Vk = stack.pop() lapack.lacpy(Vk, F, ldB = nj, offsetB = nn*nj+nn, m = na, n = na, uplo = 'L') # compute S_{Ak,Nk} = -Vk*L_{Ak,Nk}; store in 2,1 block of F blas.symm(Vk, blkval, F, m = na, n = nn, offsetB = blkptr[k]+nn,\ ldB = nj, offsetC = nn, ldC = nj, alpha = -1.0) # compute S_nn = inv(D_{Nk,Nk}) - S_{Ak,Nk}'*L_{Ak,Nk}; store in 1,1 block of F blas.gemm(F, blkval, F, transA = 'T', m = nn, n = nn, k = na,\ offsetA = nn, alpha = -1.0, beta = 1.0,\ offsetB = blkptr[k]+nn, ldB = nj) # extract update matrices if supernode k has any children for ii in range(chptr[k],chptr[k+1]): i = chidx[ii] stack.append(frontal_get_update(F, relidx, relptr, i)) # copy S_{Jk,Nk} (i.e., 1,1 and 2,1 blocks of F) to blkval lapack.lacpy(F, blkval, m = nj, n = nn, offsetB = blkptr[k], ldB = nj, uplo = 'L') L._is_factor = False return