def mk_rand(V, cone='posdef', seed=0): """ Generates random matrix U with sparsity pattern V. - U is positive definite if cone is 'posdef'. - U is completable if cone is 'completable'. """ if not (cone == 'posdef' or cone == 'completable'): raise ValueError, "cone must be 'posdef' (default) or 'completable' " from cvxopt import amd setseed(seed) n = V.size[0] U = +V U.V *= 0.0 for i in xrange(n): u = normal(n, 1) / sqrt(n) base.syrk(u, U, beta=1.0, partial=True) # test if U is in cone: if not, add multiple of identity t = 0.1 Ut = +U p = amd.order(Ut) # Vc, NF = chompack.embed(U,p) symb = chompack.symbolic(U, p) Vc = chompack.cspmatrix(symb) + U while True: # Uc = chompack.project(Vc,Ut) Uc = chompack.cspmatrix(symb) + Ut try: if cone == 'posdef': # positive definite? # Y = chompack.cholesky(Uc) Y = Uc.copy() chompack.cholesky(Y) elif cone == 'completable': # completable? # Y = chompack.completion(Uc) Y = Uc.copy() chompack.completion(Y) # Success: Ut is in cone U = +Ut break except: Ut = U + spmatrix(t, xrange(n), xrange(n), (n, n)) t *= 2.0 return U
def mk_rand(V,cone='posdef',seed=0): """ Generates random matrix U with sparsity pattern V. - U is positive definite if cone is 'posdef'. - U is completable if cone is 'completable'. """ if not (cone=='posdef' or cone=='completable'): raise ValueError("cone must be 'posdef' (default) or 'completable' ") from cvxopt import amd setseed(seed) n = V.size[0] U = +V U.V *= 0.0 for i in range(n): u = normal(n,1)/sqrt(n) base.syrk(u,U,beta=1.0,partial=True) # test if U is in cone: if not, add multiple of identity t = 0.1; Ut = +U p = amd.order(Ut) # Vc, NF = chompack.embed(U,p) symb = chompack.symbolic(U,p) Vc = chompack.cspmatrix(symb) + U while True: # Uc = chompack.project(Vc,Ut) Uc = chompack.cspmatrix(symb) + Ut try: if cone=='posdef': # positive definite? # Y = chompack.cholesky(Uc) Y = Uc.copy() chompack.cholesky(Y) elif cone=='completable': # completable? # Y = chompack.completion(Uc) Y = Uc.copy() chompack.completion(Y) # Success: Ut is in cone U = +Ut break except: Ut = U + spmatrix(t,range(n),range(n),(n,n)) t*=2.0 return U
def softmargin_appr(X, d, gamma, width, kernel='linear', sigma=1.0, degree=1, theta=1.0, Q=None): """ Solves the approximate 'soft-margin' SVM problem maximize -(1/2)*z'*Qc*z + d'*z subject to 0 <= diag(d)*z <= gamma*ones sum(z) = 0 (with variables z), and its dual problem minimize (1/2)*y'*Qc^{-1}*y + gamma*sum(v) subject to diag(d)*(y + b*ones) + v >= 1 v >= 0 (with variables y, v, b). Qc is the maximum determinant completion of the projection of Q on a band with bandwidth 2*w+1. Q_ij = K(xi, xj) where K is a kernel function and xi is the ith row of X (xi' = X[i,:]). Input arguments. X is an N x n matrix. d is an N-vector with elements -1 or 1; d[i] is the label of row X[i,:]. gamma is a positive parameter. kernel is a string with values 'linear', 'rfb', 'poly', or 'tanh'. 'linear': k(u,v) = u'*v/sigma. 'rbf': k(u,v) = exp(-||u - v||^2 / (2*sigma)). 'poly': k(u,v) = (u'*v/sigma)**degree. 'tanh': k(u,v) = tanh(u'*v/sigma - theta). sigma and theta are positive numbers. degree is a positive integer. width is a positive integer. Output. Returns a dictionary with the keys: 'classifier' a Python function object that takes an M x n matrix with test vectors as rows and returns a vector with labels 'completion classifier' a Python function object that takes an M x n matrix with test vectors as rows and returns a vector with labels 'z' a sparse m-vector 'cputime' a tuple (Ttot, Tqp, Tker) where Ttot is the total CPU time, Tqp is the CPU time spent solving the QP, and Tker is the CPU time spent computing the kernel matrix 'iterations' the number of interior-point iteations 'misclassified' a tuple (L1, L2) where L1 is a list of indices of misclassified training vectors from class 1, and L2 is a list of indices of misclassified training vectors from class 2 """ Tstart = cputime() if verbose: solvers.options['show_progress'] = True else: solvers.options['show_progress'] = False N, n = X.size if Q is None: Q, a = kernel_matrix(X, kernel, sigma=sigma, degree=degree, theta=theta, V='band', width=width) else: if not (Q.size[0] == N and Q.size[1] == N): raise ValueError("invalid kernel matrix dimensions") elif not type(Q) is cvxopt.base.spmatrix: raise ValueError("invalid kernel matrix type") elif verbose: print("using precomputed kernel matrix ..") if kernel == 'rbf': Ad = spmatrix(0.0, range(N), range(N)) base.syrk(X, V, partial=True) a = Ad.V del Ad Tkernel = cputime(Tstart) # solve qp Tqp = cputime() y, b, v, z, optval, Lc, iters = softmargin_completion( Q, matrix(d, tc='d'), gamma) Tqp = cputime(Tqp) if verbose: print("utime = %f, stime = %f." % Tqp) # extract nonzero support vectors nrmz = max(abs(z)) sv = [k for k in range(N) if abs(z[k]) > Tsv * nrmz] zs = spmatrix(z[sv], sv, [0 for i in range(len(sv))], (len(d), 1)) if verbose: print("%d support vectors." % len(sv)) Xr, zr, Nr = X[sv, :], z[sv], len(sv) # find misclassified training vectors err1 = [i for i in range(Q.size[0]) if (v[i] > 1 and d[i] == 1)] err2 = [i for i in range(Q.size[0]) if (v[i] > 1 and d[i] == -1)] if verbose: e1, n1 = len(err1), list(d).count(1) e2, n2 = len(err2), list(d).count(-1) print("class 1: %i/%i = %.1f%% misclassified." % (e1, n1, 100. * e1 / n1)) print("class 2: %i/%i = %.1f%% misclassified." % (e2, n2, 100. * e2 / n2)) del e1, e2, n1, n2 # create classifier function object # CLASSIFIER 1 (standard kernel classifier) if kernel == 'linear': # w = X'*z / sigma w = matrix(0.0, (n, 1)) blas.gemv(Xr, zr, w, trans='T', alpha=1.0 / sigma) def classifier(Y, soft=False): M = Y.size[0] x = matrix(b, (M, 1)) blas.gemv(Y, w, x, beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'rbf': def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (M, Nr)) blas.gemm(Y, Xr, K, transB='T', alpha=1.0 / sigma) # c[i] = ||Yi||^2 / sigma ones = matrix(1.0, (max([M, Nr, n]), 1)) c = Y**2 * ones[:n] blas.scal(1.0 / sigma, c) # Kij := Kij - 0.5 * (ci + aj) # = || yi - xj ||^2 / (2*sigma) blas.ger(c, ones, K, alpha=-0.5) blas.ger(ones, a[sv], K, alpha=-0.5) x = exp(K) * zr + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'tanh': def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma - theta K = matrix(theta, (M, Nr)) blas.gemm(Y, Xr, K, transB='T', alpha=1.0 / sigma, beta=-1.0) K = exp(K) x = div(K - K**-1, K + K**-1) * zr + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'poly': def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (M, Nr)) blas.gemm(Y, Xr, K, transB='T', alpha=1.0 / sigma) x = K**degree * zr + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) else: pass # CLASSIFIER 2 (completion kernel classifier) # TODO: generalize to arbitrary sparsity pattern L11 = matrix(Q[:width, :width]) lapack.potrf(L11) if kernel == 'linear': def classifier2(Y, soft=False): M = Y.size[0] W = matrix(0., (width, M)) blas.gemm(X, Y, W, transB='T', alpha=1.0 / sigma, m=width) lapack.potrs(L11, W) W = matrix([W, matrix(0., (N - width, M))]) chompack.trsm(Lc, W, trans='N') chompack.trsm(Lc, W, trans='T') x = matrix(b, (M, 1)) blas.gemv(W, z, x, trans='T', beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'poly': def classifier2(Y, soft=False): if Y is None: return zs M = Y.size[0] W = matrix(0., (width, M)) blas.gemm(X, Y, W, transB='T', alpha=1.0 / sigma, m=width) W = W**degree lapack.potrs(L11, W) W = matrix([W, matrix(0., (N - width, M))]) chompack.trsm(Lc, W, trans='N') chompack.trsm(Lc, W, trans='T') x = matrix(b, (M, 1)) blas.gemv(W, z, x, trans='T', beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'rbf': def classifier2(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (width, M)) blas.gemm(X, Y, K, transB='T', alpha=1.0 / sigma, m=width) # c[i] = ||Yi||^2 / sigma ones = matrix(1.0, (max(width, n, M), 1)) c = Y**2 * ones[:n] blas.scal(1.0 / sigma, c) # Kij := Kij - 0.5 * (ci + aj) # = || yi - xj ||^2 / (2*sigma) blas.ger(ones[:width], c, K, alpha=-0.5) blas.ger(a[:width], ones[:M], K, alpha=-0.5) # Kij = exp(Kij) K = exp(K) # complete K lapack.potrs(L11, K) K = matrix([K, matrix(0., (N - width, M))], (N, M)) chompack.trsm(Lc, K, trans='N') chompack.trsm(Lc, K, trans='T') x = matrix(b, (M, 1)) blas.gemv(K, z, x, trans='T', beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'tanh': def classifier2(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(theta, (width, M)) blas.gemm(X, Y, K, transB='T', alpha=1.0 / sigma, beta=-1.0, m=width) K = exp(K) K = div(K - K**-1, K + K**-1) # complete K lapack.potrs(L11, K) K = matrix([K, matrix(0., (N - width, M))], (N, M)) chompack.trsm(Lc, K, trans='N') chompack.trsm(Lc, K, trans='T') x = matrix(b, (M, 1)) blas.gemv(K, z, x, trans='T', beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) Ttotal = cputime(Tstart) return { 'classifier': classifier, 'completion classifier': classifier2, 'cputime': (sum(Ttotal), sum(Tqp), sum(Tkernel)), 'iterations': iters, 'z': zs, 'misclassified': (err1, err2) }
def kernel_matrix(X, kernel, sigma=1.0, theta=1.0, degree=1, V=None, width=None): """ Computes the kernel matrix or a partial kernel matrix. Input arguments. X is an N x n matrix. kernel is a string with values 'linear', 'rfb', 'poly', or 'tanh'. 'linear': k(u,v) = u'*v/sigma. 'rbf': k(u,v) = exp(-||u - v||^2 / (2*sigma)). 'poly': k(u,v) = (u'*v/sigma)**degree. 'tanh': k(u,v) = tanh(u'*v/sigma - theta). kernel is a sigma and theta are positive numbers. degree is a positive integer. V is an N x N sparse matrix (default is None). width is a positive integer (default is None). Output. Q, an N x N matrix or sparse matrix. If V is a sparse matrix, a partial kernel matrix with the sparsity pattern V is returned. If width is specified and V = 'band', a partial kernel matrix with band sparsity is returned (width is the half-bandwidth). a, an N x 1 matrix with the products <xi,xi>/sigma. """ N, n = X.size #### dense (full) kernel matrix if V is None: if verbose: print("building kernel matrix ..") # Qij = xi'*xj / sigma Q = matrix(0.0, (N, N)) blas.syrk(X, Q, alpha=1.0 / sigma) a = Q[::N + 1] # ai = ||xi||**2 / sigma if kernel == 'linear': pass elif kernel == 'rbf': # Qij := Qij - 0.5 * ( ai + aj ) # = -||xi - xj||^2 / (2*sigma) ones = matrix(1.0, (N, 1)) blas.syr2(a, ones, Q, alpha=-0.5) Q = exp(Q) elif kernel == 'tanh': Q = exp(Q - theta) Q = div(Q - Q**-1, Q + Q**-1) elif kernel == 'poly': Q = Q**degree else: raise ValueError('invalid kernel type') #### general sparse partial kernel matrix elif type(V) is cvxopt.base.spmatrix: if verbose: print("building projected kernel matrix ...") Q = +V base.syrk(X, Q, partial=True, alpha=1.0 / sigma) # ai = ||xi||**2 / sigma a = matrix(Q[::N + 1], (N, 1)) if kernel == 'linear': pass elif kernel == 'rbf': ones = matrix(1.0, (N, 1)) # Qij := Qij - 0.5 * ( ai + aj ) # = -||xi - xj||^2 / (2*sigma) p = chompack.maxcardsearch(V) symb = chompack.symbolic(Q, p) Qc = chompack.cspmatrix(symb) + Q chompack.syr2(Qc, a, ones, alpha=-0.5) Q = Qc.spmatrix(reordered=False) Q.V = exp(Q.V) elif kernel == 'tanh': v = +Q.V v = exp(v - theta) v = div(v - v**-1, v + v**-1) Q.V = v elif kernel == 'poly': Q.V = Q.V**degree else: raise ValueError('invalid kernel type') #### banded partial kernel matrix elif V == 'band' and width is not None: # Lower triangular part of band matrix with bandwidth 2*w+1. if verbose: print("building projected kernel matrix ...") I = [i for k in range(N) for i in range(k, min(width + k + 1, N))] J = [k for k in range(N) for i in range(min(width + 1, N - k))] V = matrix(0.0, (len(I), 1)) oy = 0 for k in range(N): # V[:,k] = Xtrain[k:k+w, :] * Xtrain[k,:].T m = min(width + 1, N - k) blas.gemv(X, X, V, m=m, ldA=N, incx=N, offsetA=k, offsetx=k, offsety=oy) oy += m blas.scal(1.0 / sigma, V) # ai = ||xi||**2 / sigma a = matrix(V[[i for i in range(len(I)) if I[i] == J[i]]], (N, 1)) if kernel == 'linear': Q = spmatrix(V, I, J, (N, N)) elif kernel == 'rbf': Q = spmatrix(V, I, J, (N, N)) ones = matrix(1.0, (N, 1)) # Qij := Qij - 0.5 * ( ai + aj ) # = -||xi - xj||^2 / (2*sigma) symb = chompack.symbolic(Q) Qc = chompack.cspmatrix(symb) + Q chompack.syr2(Qc, a, ones, alpha=-0.5) Q = Qc.spmatrix(reordered=False) Q.V = exp(Q.V) elif kernel == 'tanh': V = exp(V - theta) V = div(V - V**-1, V + V**-1) Q = spmatrix(V, I, J, (N, N)) elif kernel == 'poly': Q = spmatrix(V**degree, I, J, (N, N)) else: raise ValueError('invalid kernel type') else: raise TypeError('invalid type V') return Q, a
def softmargin(X, d, gamma, kernel='linear', sigma=1.0, degree=1, theta=1.0, Q=None): """ Solves the 'soft-margin' SVM problem maximize -(1/2)*z'*Q*z + d'*z subject to 0 <= diag(d)*z <= gamma*ones sum(z) = 0 (with variables z), and its dual problem minimize (1/2)*y'*Q^{-1}*y + gamma*sum(v) subject to diag(d)*(y + b*ones) + v >= 1 v >= 0 (with variables y, v, b). Q is given by Q_ij = K(xi, xj) where K is a kernel function and xi is the ith row of X (xi' = X[i,:]). If Q is singular, we replace Q^{-1} in the dual with its pseudo-inverse and add a constraint y in Range(Q). We can also use make a change of variables y = Q*u to obtain minimize (1/2)*u'*Q*u + gamma*sum(v) subject to diag(d)*(Q*u + b*ones) + v >= 1 v >= 0 For the linear kernel (Q = X*X'), a change of variables w = X'*u allows us to write this in the more common form minimize (1/2)*w'*w + gamma*sum(v) subject to diag(d)*(X*w + b*ones) + v >= 1 v >= 0. Input arguments. X is an N x n matrix. d is an N-vector with elements -1 or 1; d[i] is the label of row X[i,:]. gamma is a positive parameter. kernel is a string with values 'linear', 'rfb', 'poly', or 'tanh'. 'linear': k(u,v) = u'*v/sigma. 'rbf': k(u,v) = exp(-||u - v||^2 / (2*sigma)). 'poly': k(u,v) = (u'*v/sigma)**degree. 'tanh': k(u,v) = tanh(u'*v/sigma - theta). sigma and theta are positive numbers. degree is a positive integer. Output. Returns a dictionary with the keys: 'classifier' a Python function object that takes an M x n matrix with test vectors as rows and returns a vector with labels 'z' a sparse m-vector 'cputime' a tuple (Ttot, Tqp, Tker) where Ttot is the total CPU time, Tqp is the CPU time spent solving the QP, and Tker is the CPU time spent computing the kernel matrix 'iterations' the number of interior-point iteations 'misclassified' a tuple (L1, L2) where L1 is a list of indices of misclassified training vectors from class 1, and L2 is a list of indices of misclassified training vectors from class 2 """ Tstart = cputime() if verbose: solvers.options['show_progress'] = True else: solvers.options['show_progress'] = False N, n = X.size if Q is None: Q, a = kernel_matrix(X, kernel, sigma=sigma, degree=degree, theta=theta) else: if not (Q.size[0] == N and Q.size[1] == N): raise ValueError("invalid kernel matrix dimensions") elif not type(Q) is cvxopt.base.matrix: raise ValueError("invalid kernel matrix type") elif verbose: print("using precomputed kernel matrix ..") if kernel == 'rbf': Ad = spmatrix(0.0, range(N), range(N)) base.syrk(X, V, partial=True) a = Ad.V del Ad Tkernel = cputime(Tstart) # build qp Tqp = cputime() q = matrix(-d, tc='d') # Inequality constraints 0 <= diag(d)*z <= gamma*ones G = spmatrix([], [], [], size=(2 * N, N)) G[::2 * N + 1], G[N::2 * N + 1] = d, -d h = matrix(0.0, (2 * N, 1)) if weights is 'proportional': dlist = list(d) C1 = 0.5 * N * gamma / dlist.count(1) C2 = 0.5 * N * gamma / dlist.count(-1) gvec = matrix([C1 if w == 1 else C2 for w in dlist], (N, 1)) del dlist h[:N] = gvec elif weights is 'equal': h[:N] = gamma else: raise ValueError("invalid weight type") # solve qp sol = solvers.qp(Q, q, G, h, matrix(1.0, (1, N)), matrix(0.0)) Tqp = cputime(Tqp) if verbose: print("utime = %f, stime = %f." % Tqp) # extract solution z, b, v = sol['x'], sol['y'][0], sol['z'][:N] # extract nonzero support vectors nrmz = max(abs(z)) sv = [k for k in range(N) if abs(z[k]) > Tsv * nrmz] N, X, z = len(sv), X[sv, :], z[sv] zs = spmatrix(z, sv, [0 for i in range(N)], (len(d), 1)) if verbose: print("%d support vectors." % N) # find misclassified training vectors err1 = [i for i in range(Q.size[0]) if (v[i] > 1 and d[i] == 1)] err2 = [i for i in range(Q.size[0]) if (v[i] > 1 and d[i] == -1)] if verbose: e1, n1 = len(err1), list(d).count(1) e2, n2 = len(err2), list(d).count(-1) print("class 1: %i/%i = %.1f%% misclassified." % (e1, n1, 100. * e1 / n1)) print("class 2: %i/%i = %.1f%% misclassified." % (e2, n2, 100. * e2 / n2)) del e1, e2, n1, n2 # create classifier function object if kernel == 'linear': # w = X'*z / sigma w = matrix(0.0, (n, 1)) blas.gemv(X, z, w, trans='T', alpha=1.0 / sigma) def classifier(Y, soft=False): M = Y.size[0] x = matrix(b, (M, 1)) blas.gemv(Y, w, x, beta=1.0) if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'rbf': def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (M, N)) blas.gemm(Y, X, K, transB='T', alpha=1.0 / sigma) # c[i] = ||Yi||^2 / sigma ones = matrix(1.0, (max([M, N, n]), 1)) c = Y**2 * ones[:n] blas.scal(1.0 / sigma, c) # Kij := Kij - 0.5 * (ci + aj) # = || yi - xj ||^2 / (2*sigma) blas.ger(c, ones, K, alpha=-0.5) blas.ger(ones, a[sv], K, alpha=-0.5) x = exp(K) * z + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'tanh': def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma - theta K = matrix(theta, (M, N)) blas.gemm(Y, X, K, transB='T', alpha=1.0 / sigma, beta=-1.0) K = exp(K) x = div(K - K**-1, K + K**-1) * z + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) elif kernel == 'poly': def classifier(Y, soft=False): M = Y.size[0] # K = Y*X' / sigma K = matrix(0.0, (M, N)) blas.gemm(Y, X, K, transB='T', alpha=1.0 / sigma) x = K**degree * z + b if soft: return x else: return matrix([2 * (xk > 0.0) - 1 for xk in x]) Ttotal = cputime(Tstart) return { 'classifier': classifier, 'cputime': (sum(Ttotal), sum(Tqp), sum(Tkernel)), 'iterations': sol['iterations'], 'z': zs, 'misclassified': (err1, err2) }
def F(W): """ Custom solver for the system [ It 0 0 Xt' 0 At1' ... Atk' ][ dwt ] [ rwt ] [ 0 0 0 -d' 0 0 ... 0 ][ db ] [ rb ] [ 0 0 0 -I -I 0 ... 0 ][ dv ] [ rv ] [ Xt -d -I -Wl1^-2 ][ dzl1 ] [ rl1 ] [ 0 0 -I -Wl2^-2 ][ dzl2 ] = [ rl2 ] [ At1 0 0 -W1^-2 ][ dz1 ] [ r1 ] [ | | | . ][ | ] [ | ] [ Atk 0 0 -Wk^-2 ][ dzk ] [ rk ] where It = [ I 0 ] Xt = [ -D*X E ] Ati = [ 0 -e_i' ] [ 0 0 ] [ -Pi 0 ] dwt = [ dw ] rwt = [ rw ] [ dt ] [ rt ]. """ # scalings and 'intermediate' vectors # db = inv(Wl1)^2 + inv(Wl2)^2 db = W['di'][:m]**2 + W['di'][m:2 * m]**2 dbi = div(1.0, db) # dt = I - inv(Wl1)*Dbi*inv(Wl1) dt = 1.0 - mul(W['di'][:m]**2, dbi) dtsqrt = sqrt(dt) # lam = Dt*inv(Wl1)*d lam = mul(dt, mul(W['di'][:m], d)) # lt = E'*inv(Wl1)*lam lt = matrix(0.0, (k, 1)) base.gemv(E, mul(W['di'][:m], lam), lt, trans='T') # Xs = sqrt(Dt)*inv(Wl1)*X tmp = mul(dtsqrt, W['di'][:m]) Xs = spmatrix(tmp, range(m), range(m)) * X # Es = D*sqrt(Dt)*inv(Wl1)*E Es = spmatrix(mul(d, tmp), range(m), range(m)) * E # form Ab = I + sum((1/bi)^2*(Pi'*Pi + 4*(v'*v + 1)*Pi'*y*y'*Pi)) + Xs'*Xs # and Bb = -sum((1/bi)^2*(4*ui*v'*v*Pi'*y*ei')) - Xs'*Es # and D2 = Es'*Es + sum((1/bi)^2*(1+4*ui^2*(v'*v - 1)) Ab = matrix(0.0, (n, n)) Ab[::n + 1] = 1.0 base.syrk(Xs, Ab, trans='T', beta=1.0) Bb = matrix(0.0, (n, k)) Bb = -Xs.T * Es # inefficient!? D2 = spmatrix(0.0, range(k), range(k)) base.syrk(Es, D2, trans='T', partial=True) d2 = +D2.V del D2 py = matrix(0.0, (n, 1)) for i in range(k): binvsq = (1.0 / W['beta'][i])**2 Ab += binvsq * Pt[i] dvv = blas.dot(W['v'][i], W['v'][i]) blas.gemv(P[i], W['v'][i][1:], py, trans='T', alpha=1.0, beta=0.0) blas.syrk(py, Ab, alpha=4 * binvsq * (dvv + 1), beta=1.0) Bb[:, i] -= 4 * binvsq * W['v'][i][0] * dvv * py d2[i] += binvsq * (1 + 4 * (W['v'][i][0]**2) * (dvv - 1)) d2i = div(1.0, d2) d2isqrt = sqrt(d2i) # compute a = alpha - lam'*inv(Wl1)*E*inv(D2)*E'*inv(Wl1)*lam alpha = blas.dot(lam, mul(W['di'][:m], d)) tmp = matrix(0.0, (k, 1)) base.gemv(E, mul(W['di'][:m], lam), tmp, trans='T') tmp = mul(tmp, d2isqrt) #tmp = inv(D2)^(1/2)*E'*inv(Wl1)*lam a = alpha - blas.dot(tmp, tmp) # compute M12 = X'*D*inv(Wl1)*lam + Bb*inv(D2)*E'*inv(Wl1)*lam tmp = mul(tmp, d2isqrt) M12 = matrix(0.0, (n, 1)) blas.gemv(Bb, tmp, M12, alpha=1.0) tmp = mul(d, mul(W['di'][:m], lam)) blas.gemv(X, tmp, M12, trans='T', alpha=1.0, beta=1.0) # form and factor M sBb = Bb * spmatrix(d2isqrt, range(k), range(k)) base.syrk(sBb, Ab, alpha=-1.0, beta=1.0) M = matrix([[Ab, M12.T], [M12, a]]) lapack.potrf(M) def f(x, y, z): # residuals rwt = x[:n + k] rb = x[n + k] rv = x[n + k + 1:n + k + 1 + m] iw_rl1 = mul(W['di'][:m], z[:m]) iw_rl2 = mul(W['di'][m:2 * m], z[m:2 * m]) ri = [ z[2 * m + i * (n + 1):2 * m + (i + 1) * (n + 1)] for i in range(k) ] # compute 'derived' residuals # rbwt = rwt + sum(Ai'*inv(Wi)^2*ri) + [-X'*D; E']*inv(Wl1)^2*rl1 rbwt = +rwt for i in range(k): tmp = +ri[i] qscal(tmp, W['beta'][i], W['v'][i], inv=True) qscal(tmp, W['beta'][i], W['v'][i], inv=True) rbwt[n + i] -= tmp[0] blas.gemv(P[i], tmp[1:], rbwt, trans='T', alpha=-1.0, beta=1.0) tmp = mul(W['di'][:m], iw_rl1) tmp2 = matrix(0.0, (k, 1)) base.gemv(E, tmp, tmp2, trans='T') rbwt[n:] += tmp2 tmp = mul(d, tmp) # tmp = D*inv(Wl1)^2*rl1 blas.gemv(X, tmp, rbwt, trans='T', alpha=-1.0, beta=1.0) # rbb = rb - d'*inv(Wl1)^2*rl1 rbb = rb - sum(tmp) # rbv = rv - inv(Wl2)*rl2 - inv(Wl1)^2*rl1 rbv = rv - mul(W['di'][m:2 * m], iw_rl2) - mul(W['di'][:m], iw_rl1) # [rtw;rtt] = rbwt + [-X'*D; E']*inv(Wl1)^2*inv(Db)*rbv tmp = mul(W['di'][:m]**2, mul(dbi, rbv)) rtt = +rbwt[n:] base.gemv(E, tmp, rtt, trans='T', alpha=1.0, beta=1.0) rtw = +rbwt[:n] tmp = mul(d, tmp) blas.gemv(X, tmp, rtw, trans='T', alpha=-1.0, beta=1.0) # rtb = rbb - d'*inv(Wl1)^2*inv(Db)*rbv rtb = rbb - sum(tmp) # solve M*[dw;db] = [rtw - Bb*inv(D2)*rtt; rtb + lt'*inv(D2)*rtt] tmp = mul(d2i, rtt) tmp2 = matrix(0.0, (n, 1)) blas.gemv(Bb, tmp, tmp2) dwdb = matrix([rtw - tmp2, rtb + blas.dot(mul(d2i, lt), rtt)]) lapack.potrs(M, dwdb) # compute dt = inv(D2)*(rtt - Bb'*dw + lt*db) tmp2 = matrix(0.0, (k, 1)) blas.gemv(Bb, dwdb[:n], tmp2, trans='T') dt = mul(d2i, rtt - tmp2 + lt * dwdb[-1]) # compute dv = inv(Db)*(rbv + inv(Wl1)^2*(E*dt - D*X*dw - d*db)) dv = matrix(0.0, (m, 1)) blas.gemv(X, dwdb[:n], dv, alpha=-1.0) dv = mul(d, dv) - d * dwdb[-1] base.gemv(E, dt, dv, beta=1.0) tmp = +dv # tmp = E*dt - D*X*dw - d*db dv = mul(dbi, rbv + mul(W['di'][:m]**2, dv)) # compute wdz1 = inv(Wl1)*(E*dt - D*X*dw - d*db - dv - rl1) wdz1 = mul(W['di'][:m], tmp - dv) - iw_rl1 # compute wdz2 = - inv(Wl2)*(dv + rl2) wdz2 = -mul(W['di'][m:2 * m], dv) - iw_rl2 # compute wdzi = inv(Wi)*([-ei'*dt; -Pi*dw] - ri) wdzi = [] tmp = matrix(0.0, (n, 1)) for i in range(k): blas.gemv(P[i], dwdb[:n], tmp, alpha=-1.0, beta=0.0) tmp1 = matrix([-dt[i], tmp]) blas.axpy(ri[i], tmp1, alpha=-1.0) qscal(tmp1, W['beta'][i], W['v'][i], inv=True) wdzi.append(tmp1) # solution x[:n] = dwdb[:n] x[n:n + k] = dt x[n + k] = dwdb[-1] x[n + k + 1:] = dv z[:m] = wdz1 z[m:2 * m] = wdz2 for i in range(k): z[2 * m + i * (n + 1):2 * m + (i + 1) * (n + 1)] = wdzi[i] return f
def solve(A, b, C, L, dims, proxqp=None, sigma=1.0, rho=1.0, **kwargs): """ Solves the SDP min. < c, x > s.t. A(x) = b x >= 0 and its dual max. -< b, y > s.t. s >= 0. c + A'(y) = s Input arguments. A is an N x M sparse matrix where N = sum_i ns[i]**2 and M = sum_j ms[j] and ns and ms are the SDP variable sizes and constraint block lengths respectively. The expression A(x) = b can be written as A.T*xtilde = b, where xtilde is a stacked vector of vectorized versions of xi. b is a stacked vector containing constraint vectors of size m_i x 1. C is a stacked vector containing vectorized 'd' matrices c_k of size n_k**2 x 1, representing symmetric matrices. L is an N X P sparse matrix, where L.T*X = 0 represents the consistency constraints. If an index k appears in different cliques i,j, and in converted form are indexed by it, jt, then L[it,l] = 1, L[jt,l] = -1 for some l. dims is a dictionary containing conic dimensions. dims['l'] contains number of linear variables under nonnegativity constrant dims['q'] contains a list of quadratic cone orders (not implemented!) dims['s'] contains a list of semidefinite cone matrix orders proxqp is either a function pointer to a prox implementation, or, if the problem has block-diagonal correlative sparsity, a pointer to the prox implementation of a single clique. The choices are: proxqp_general : solves prox for general sparsity pattern proxqp_clique : solves prox for a single dense clique with only semidefinite variables. proxqp_clique_SNL : solves prox for sensor network localization problem sigma is a nonnegative constant (step size) rho is a nonnegative constaint between 0 and 2 (overrelaxation parameter) In addition, the following paramters are optional: maxiter : maximum number of iterations (default 100) reltol : relative tolerance (default 0.01). If rp < reltol and rd < reltol and iteration < maxiter, solver breaks and returns current value. adaptive : boolean toggle on whether adaptive step size should be used. (default False) mu, tau, tauscale : parameters for adaptive step size (see paper) multiprocess : number of parallel processes (default 1). if multiprocess = 1, no parallelization is used. blockdiagonal : boolean toggle on whether problem has block diagonal correlative sparsity. Note that even if the problem does have block-diagonal correlative sparsity, if this parameter is set to False, then general mode is used. (default False) verbose : toggle printout (default True) log_cputime : toggle whether cputime should be logged. The output is returned in a dictionary with the following files: x : primal variable in stacked form (X = [x0, ..., x_{N-1}]) where xk is the vectorized form of the nk x nk submatrix variable. y, z : iterates in Spingarn's method cputime, walltime : total cputime and walltime, respectively, spent in main loop. If log_cputime is False, then cputime is returned as 0. primal, rprimal, rdual : evolution of primal optimal value, primal residual, and dual residual (resp.) sigma : evolution of step size sigma (changes if adaptive step size is used.) """ solvers.options['show_progress'] = False maxiter = kwargs.get('maxiter', 100) reltol = kwargs.get('reltol', 0.01) adaptive = kwargs.get('adaptive', False) mu = kwargs.get('mu', 2.0) tau = kwargs.get('tau', 1.5) multiprocess = kwargs.get('multiprocess', 1) tauscale = kwargs.get('tauscale', 0.9) blockdiagonal = kwargs.get('blockdiagonal', False) verbose = kwargs.get('verbose', True) log_cputime = kwargs.get('log_cputime', True) if log_cputime: try: import psutil except (ImportError): assert False, "Python package psutil required to log cputime. Package can be downloaded at http://code.google.com/p/psutil/" #format variables nl, ns = dims['l'], dims['s'] C = C[nl:] L = L[nl:, :] As, bs = [], [] cons = [] offset = 0 for k in xrange(len(ns)): Atmp = sparse(A[nl + offset:nl + offset + ns[k]**2, :]) J = list(set(list(Atmp.J))) Atmp = Atmp[:, J] if len(sparse(Atmp).V) == Atmp[:].size[0]: Atmp = matrix(Atmp) else: Atmp = sparse(Atmp) As.append(Atmp) bs.append(b[J]) cons.append(J) offset += ns[k]**2 if blockdiagonal: if sum([len(c) for c in cons]) > len(b): print "Problem does not have block-diagonal correlative sparsity. Switching to general mode." blockdiagonal = False #If not block-diagonal correlative sprasity, represent A as a list of lists: # A[i][j] is a matrix (or spmatrix) if ith clique involves jth constraint block #Otherwise, A is a list of matrices, where A[i] involves the ith clique and #ith constraint block only. if not blockdiagonal: while sum([len(c) for c in cons]) > len(b): tobreak = False for i in xrange(len(cons)): for j in xrange(i): ci, cj = set(cons[i]), set(cons[j]) s1 = ci.intersection(cj) if len(s1) > 0: s2 = ci.difference(cj) s3 = cj.difference(ci) cons.append(list(s1)) if len(s2) > 0: s2 = list(s2) if not (s2 in cons): cons.append(s2) if len(s3) > 0: s3 = list(s3) if not (s3 in cons): cons.append(s3) cons.pop(i) cons.pop(j) tobreak = True break if tobreak: break As, bs = [], [] for i in xrange(len(cons)): J = cons[i] bs.append(b[J]) Acol = [] offset = 0 for k in xrange(len(ns)): Atmp = sparse(A[nl + offset:nl + offset + ns[k]**2, J]) if len(Atmp.V) == 0: Acol.append(0) elif len(Atmp.V) == Atmp[:].size[0]: Acol.append(matrix(Atmp)) else: Acol.append(Atmp) offset += ns[k]**2 As.append(Acol) ms = [len(i) for i in bs] bs = matrix(bs) meq = L.size[1] if (not blockdiagonal) and multiprocess > 1: print "Multiprocessing mode can only be used if correlative sparsity is block diagonal. Switching to sequential mode." multiprocess = 1 assert rho > 0 and rho < 2, 'Overrelaxaton parameter (rho) must be (strictly) between 0 and 2' # create routine for projecting on { x | L*x = 0 } #{ x | L*x = 0 } -> P = I - L*(L.T*L)i *L.T LTL = spmatrix([], [], [], (meq, meq)) offset = 0 for k in ns: Lk = L[offset:offset + k**2, :] base.syrk(Lk, LTL, trans='T', beta=1.0) offset += k**2 LTLi = cholmod.symbolic(LTL, amd.order(LTL)) cholmod.numeric(LTL, LTLi) #y = y - L*LTLi*L.T*y nssq = sum(matrix([nsk**2 for nsk in ns])) def proj(y, ip=True): if not ip: y = +y tmp = matrix(0.0, size=(meq, 1)) ypre = +y base.gemv(L,y,tmp,trans='T',\ m = nssq, n = meq, beta = 1) cholmod.solve(LTLi, tmp) base.gemv(L,tmp,y,beta=1.0,alpha=-1.0,trans='N',\ m = nssq, n = meq) if not ip: return y time_to_solve = 0 #initialize variables X = C * 0.0 Y = +X Z = +X dualS = +X dualy = +b PXZ = +X proxargs = { 'C': C, 'A': As, 'b': bs, 'Z': Z, 'X': X, 'sigma': sigma, 'dualS': dualS, 'dualy': dualy, 'ns': ns, 'ms': ms, 'multiprocess': multiprocess } if blockdiagonal: proxqp = proxqp_blockdiagonal(proxargs, proxqp) else: proxqp = proxqp_general if log_cputime: utime = psutil.cpu_times()[0] wtime = time.time() primal = [] rpvec, rdvec = [], [] sigmavec = [] for it in xrange(maxiter): pv, gap = proxqp(proxargs) blas.copy(Z, Y) blas.axpy(X, Y, alpha=-2.0) proj(Y, ip=True) #PXZ = sigma*(X-Z) blas.copy(X, PXZ) blas.scal(sigma, PXZ) blas.axpy(Z, PXZ, alpha=-sigma) #z = z + rho*(y-x) blas.axpy(X, Y, alpha=1.0) blas.axpy(Y, Z, alpha=-rho) xzn = blas.nrm2(PXZ) xn = blas.nrm2(X) xyn = blas.nrm2(Y) proj(PXZ, ip=True) rdual = blas.nrm2(PXZ) rpri = sqrt(abs(xyn**2 - rdual**2)) / sigma if log_cputime: cputime = psutil.cpu_times()[0] - utime else: cputime = 0 walltime = time.time() - wtime if rpri / max(xn, 1.0) < reltol and rdual / max(1.0, xzn) < reltol: break rpvec.append(rpri / max(xn, 1.0)) rdvec.append(rdual / max(1.0, xzn)) primal.append(pv) if adaptive: if (rdual / xzn * mu < rpri / xn): sigmanew = sigma * tau elif (rpri / xn * mu < rdual / xzn): sigmanew = sigma / tau else: sigmanew = sigma if it % 10 == 0 and it > 0 and tau > 1.0: tauscale *= 0.9 tau = 1 + (tau - 1) * tauscale sigma = max(min(sigmanew, 10.0), 0.1) sigmavec.append(sigma) if verbose: if log_cputime: print "%d: primal = %e, gap = %e, (rp,rd) = (%e,%e), sigma = %f, (cputime,walltime) = (%f, %f)" % ( it, pv, gap, rpri / max(xn, 1.0), rdual / max(1.0, xzn), sigma, cputime, walltime) else: print "%d: primal = %e, gap = %e, (rp,rd) = (%e,%e), sigma = %f, walltime = %f" % ( it, pv, gap, rpri / max(xn, 1.0), rdual / max(1.0, xzn), sigma, walltime) sol = {} sol['x'] = X sol['y'] = Y sol['z'] = Z sol['cputime'] = cputime sol['walltime'] = walltime sol['primal'] = primal sol['rprimal'] = rpvec sol['rdual'] = rdvec sol['sigma'] = sigmavec return sol
def factor(W, H=None, Df=None): if F['firstcall']: if type(G) is matrix: F['Gs'] = matrix(0.0, G.size) else: F['Gs'] = spmatrix(0.0, G.I, G.J, G.size) if mnl: if type(Df) is matrix: F['Dfs'] = matrix(0.0, Df.size) else: F['Dfs'] = spmatrix(0.0, Df.I, Df.J, Df.size) if (mnl and type(Df) is matrix) or type(G) is matrix or \ type(H) is matrix: F['S'] = matrix(0.0, (n, n)) F['K'] = matrix(0.0, (p, p)) else: F['S'] = spmatrix([], [], [], (n, n), 'd') F['Sf'] = None if type(A) is matrix: F['K'] = matrix(0.0, (p, p)) else: F['K'] = spmatrix([], [], [], (p, p), 'd') # Dfs = Wnl^{-1} * Df if mnl: base.gemm(spmatrix(W['dnli'], list(range(mnl)), list(range(mnl))), Df, F['Dfs'], partial=True) # Gs = Wl^{-1} * G. base.gemm(spmatrix(W['di'], list(range(ml)), list(range(ml))), G, F['Gs'], partial=True) if F['firstcall']: base.syrk(F['Gs'], F['S'], trans='T') if mnl: base.syrk(F['Dfs'], F['S'], trans='T', beta=1.0) if H is not None: F['S'] += H try: if type(F['S']) is matrix: lapack.potrf(F['S']) else: F['Sf'] = cholmod.symbolic(F['S']) cholmod.numeric(F['S'], F['Sf']) except ArithmeticError: F['singular'] = True if type(A) is matrix and type(F['S']) is spmatrix: F['S'] = matrix(0.0, (n, n)) base.syrk(F['Gs'], F['S'], trans='T') if mnl: base.syrk(F['Dfs'], F['S'], trans='T', beta=1.0) base.syrk(A, F['S'], trans='T', beta=1.0) if H is not None: F['S'] += H if type(F['S']) is matrix: lapack.potrf(F['S']) else: F['Sf'] = cholmod.symbolic(F['S']) cholmod.numeric(F['S'], F['Sf']) F['firstcall'] = False else: base.syrk(F['Gs'], F['S'], trans='T', partial=True) if mnl: base.syrk(F['Dfs'], F['S'], trans='T', beta=1.0, partial=True) if H is not None: F['S'] += H if F['singular']: base.syrk(A, F['S'], trans='T', beta=1.0, partial=True) if type(F['S']) is matrix: lapack.potrf(F['S']) else: cholmod.numeric(F['S'], F['Sf']) if type(F['S']) is matrix: # Asct := L^{-1}*A'. Factor K = Asct'*Asct. if type(A) is matrix: Asct = A.T else: Asct = matrix(A.T) blas.trsm(F['S'], Asct) blas.syrk(Asct, F['K'], trans='T') lapack.potrf(F['K']) else: # Asct := L^{-1}*P*A'. Factor K = Asct'*Asct. if type(A) is matrix: Asct = A.T cholmod.solve(F['Sf'], Asct, sys=7) cholmod.solve(F['Sf'], Asct, sys=4) blas.syrk(Asct, F['K'], trans='T') lapack.potrf(F['K']) else: Asct = cholmod.spsolve(F['Sf'], A.T, sys=7) Asct = cholmod.spsolve(F['Sf'], Asct, sys=4) base.syrk(Asct, F['K'], trans='T') Kf = cholmod.symbolic(F['K']) cholmod.numeric(F['K'], Kf) def solve(x, y, z): # Solve # # [ H A' GG'*W^{-1} ] [ ux ] [ bx ] # [ A 0 0 ] * [ uy ] = [ by ] # [ W^{-T}*GG 0 -I ] [ W*uz ] [ W^{-T}*bz ] # # and return ux, uy, W*uz. # # If not F['singular']: # # K*uy = A * S^{-1} * ( bx + GG'*W^{-1}*W^{-T}*bz ) - by # S*ux = bx + GG'*W^{-1}*W^{-T}*bz - A'*uy # W*uz = W^{-T} * ( GG*ux - bz ). # # If F['singular']: # # K*uy = A * S^{-1} * ( bx + GG'*W^{-1}*W^{-T}*bz + A'*by ) # - by # S*ux = bx + GG'*W^{-1}*W^{-T}*bz + A'*by - A'*y. # W*uz = W^{-T} * ( GG*ux - bz ). # z := W^{-1} * z = W^{-1} * bz scale(z, W, trans='T', inverse='I') # If not F['singular']: # x := L^{-1} * P * (x + GGs'*z) # = L^{-1} * P * (x + GG'*W^{-1}*W^{-T}*bz) # # If F['singular']: # x := L^{-1} * P * (x + GGs'*z + A'*y)) # = L^{-1} * P * (x + GG'*W^{-1}*W^{-T}*bz + A'*y) if mnl: base.gemv(F['Dfs'], z, x, trans='T', beta=1.0) base.gemv(F['Gs'], z, x, offsetx=mnl, trans='T', beta=1.0) if F['singular']: base.gemv(A, y, x, trans='T', beta=1.0) if type(F['S']) is matrix: blas.trsv(F['S'], x) else: cholmod.solve(F['Sf'], x, sys=7) cholmod.solve(F['Sf'], x, sys=4) # y := K^{-1} * (Asc*x - y) # = K^{-1} * (A * S^{-1} * (bx + GG'*W^{-1}*W^{-T}*bz) - by) # (if not F['singular']) # = K^{-1} * (A * S^{-1} * (bx + GG'*W^{-1}*W^{-T}*bz + # A'*by) - by) # (if F['singular']). base.gemv(Asct, x, y, trans='T', beta=-1.0) if type(F['K']) is matrix: lapack.potrs(F['K'], y) else: cholmod.solve(Kf, y) # x := P' * L^{-T} * (x - Asc'*y) # = S^{-1} * (bx + GG'*W^{-1}*W^{-T}*bz - A'*y) # (if not F['singular']) # = S^{-1} * (bx + GG'*W^{-1}*W^{-T}*bz + A'*by - A'*y) # (if F['singular']) base.gemv(Asct, y, x, alpha=-1.0, beta=1.0) if type(F['S']) is matrix: blas.trsv(F['S'], x, trans='T') else: cholmod.solve(F['Sf'], x, sys=5) cholmod.solve(F['Sf'], x, sys=8) # W*z := GGs*x - z = W^{-T} * (GG*x - bz) if mnl: base.gemv(F['Dfs'], x, z, beta=-1.0) base.gemv(F['Gs'], x, z, beta=-1.0, offsety=mnl) return solve
def F(W): """ Custom solver for the system [ It 0 0 Xt' 0 At1' ... Atk' ][ dwt ] [ rwt ] [ 0 0 0 -d' 0 0 ... 0 ][ db ] [ rb ] [ 0 0 0 -I -I 0 ... 0 ][ dv ] [ rv ] [ Xt -d -I -Wl1^-2 ][ dzl1 ] [ rl1 ] [ 0 0 -I -Wl2^-2 ][ dzl2 ] = [ rl2 ] [ At1 0 0 -W1^-2 ][ dz1 ] [ r1 ] [ | | | . ][ | ] [ | ] [ Atk 0 0 -Wk^-2 ][ dzk ] [ rk ] where It = [ I 0 ] Xt = [ -D*X E ] Ati = [ 0 -e_i' ] [ 0 0 ] [ -Pi 0 ] dwt = [ dw ] rwt = [ rw ] [ dt ] [ rt ]. """ # scalings and 'intermediate' vectors # db = inv(Wl1)^2 + inv(Wl2)^2 db = W['di'][:m]**2 + W['di'][m:2*m]**2 dbi = div(1.0,db) # dt = I - inv(Wl1)*Dbi*inv(Wl1) dt = 1.0 - mul(W['di'][:m]**2,dbi) dtsqrt = sqrt(dt) # lam = Dt*inv(Wl1)*d lam = mul(dt,mul(W['di'][:m],d)) # lt = E'*inv(Wl1)*lam lt = matrix(0.0,(k,1)) base.gemv(E, mul(W['di'][:m],lam), lt, trans = 'T') # Xs = sqrt(Dt)*inv(Wl1)*X tmp = mul(dtsqrt,W['di'][:m]) Xs = spmatrix(tmp,range(m),range(m))*X # Es = D*sqrt(Dt)*inv(Wl1)*E Es = spmatrix(mul(d,tmp),range(m),range(m))*E # form Ab = I + sum((1/bi)^2*(Pi'*Pi + 4*(v'*v + 1)*Pi'*y*y'*Pi)) + Xs'*Xs # and Bb = -sum((1/bi)^2*(4*ui*v'*v*Pi'*y*ei')) - Xs'*Es # and D2 = Es'*Es + sum((1/bi)^2*(1+4*ui^2*(v'*v - 1)) Ab = matrix(0.0,(n,n)) Ab[::n+1] = 1.0 base.syrk(Xs,Ab,trans = 'T', beta = 1.0) Bb = matrix(0.0,(n,k)) Bb = -Xs.T*Es # inefficient!? D2 = spmatrix(0.0,range(k),range(k)) base.syrk(Es,D2,trans = 'T', partial = True) d2 = +D2.V del D2 py = matrix(0.0,(n,1)) for i in range(k): binvsq = (1.0/W['beta'][i])**2 Ab += binvsq*Pt[i] dvv = blas.dot(W['v'][i],W['v'][i]) blas.gemv(P[i], W['v'][i][1:], py, trans = 'T', alpha = 1.0, beta = 0.0) blas.syrk(py, Ab, alpha = 4*binvsq*(dvv+1), beta = 1.0) Bb[:,i] -= 4*binvsq*W['v'][i][0]*dvv*py d2[i] += binvsq*(1+4*(W['v'][i][0]**2)*(dvv-1)) d2i = div(1.0,d2) d2isqrt = sqrt(d2i) # compute a = alpha - lam'*inv(Wl1)*E*inv(D2)*E'*inv(Wl1)*lam alpha = blas.dot(lam,mul(W['di'][:m],d)) tmp = matrix(0.0,(k,1)) base.gemv(E,mul(W['di'][:m],lam), tmp, trans = 'T') tmp = mul(tmp, d2isqrt) #tmp = inv(D2)^(1/2)*E'*inv(Wl1)*lam a = alpha - blas.dot(tmp,tmp) # compute M12 = X'*D*inv(Wl1)*lam + Bb*inv(D2)*E'*inv(Wl1)*lam tmp = mul(tmp, d2isqrt) M12 = matrix(0.0,(n,1)) blas.gemv(Bb,tmp,M12, alpha = 1.0) tmp = mul(d,mul(W['di'][:m],lam)) blas.gemv(X,tmp,M12, trans = 'T', alpha = 1.0, beta = 1.0) # form and factor M sBb = Bb * spmatrix(d2isqrt,range(k), range(k)) base.syrk(sBb, Ab, alpha = -1.0, beta = 1.0) M = matrix([[Ab, M12.T],[M12, a]]) lapack.potrf(M) def f(x,y,z): # residuals rwt = x[:n+k] rb = x[n+k] rv = x[n+k+1:n+k+1+m] iw_rl1 = mul(W['di'][:m],z[:m]) iw_rl2 = mul(W['di'][m:2*m],z[m:2*m]) ri = [z[2*m+i*(n+1):2*m+(i+1)*(n+1)] for i in range(k)] # compute 'derived' residuals # rbwt = rwt + sum(Ai'*inv(Wi)^2*ri) + [-X'*D; E']*inv(Wl1)^2*rl1 rbwt = +rwt for i in range(k): tmp = +ri[i] qscal(tmp,W['beta'][i],W['v'][i],inv=True) qscal(tmp,W['beta'][i],W['v'][i],inv=True) rbwt[n+i] -= tmp[0] blas.gemv(P[i], tmp[1:], rbwt, trans = 'T', alpha = -1.0, beta = 1.0) tmp = mul(W['di'][:m],iw_rl1) tmp2 = matrix(0.0,(k,1)) base.gemv(E,tmp,tmp2,trans='T') rbwt[n:] += tmp2 tmp = mul(d,tmp) # tmp = D*inv(Wl1)^2*rl1 blas.gemv(X,tmp,rbwt,trans='T', alpha = -1.0, beta = 1.0) # rbb = rb - d'*inv(Wl1)^2*rl1 rbb = rb - sum(tmp) # rbv = rv - inv(Wl2)*rl2 - inv(Wl1)^2*rl1 rbv = rv - mul(W['di'][m:2*m],iw_rl2) - mul(W['di'][:m],iw_rl1) # [rtw;rtt] = rbwt + [-X'*D; E']*inv(Wl1)^2*inv(Db)*rbv tmp = mul(W['di'][:m]**2, mul(dbi,rbv)) rtt = +rbwt[n:] base.gemv(E, tmp, rtt, trans = 'T', alpha = 1.0, beta = 1.0) rtw = +rbwt[:n] tmp = mul(d,tmp) blas.gemv(X, tmp, rtw, trans = 'T', alpha = -1.0, beta = 1.0) # rtb = rbb - d'*inv(Wl1)^2*inv(Db)*rbv rtb = rbb - sum(tmp) # solve M*[dw;db] = [rtw - Bb*inv(D2)*rtt; rtb + lt'*inv(D2)*rtt] tmp = mul(d2i,rtt) tmp2 = matrix(0.0,(n,1)) blas.gemv(Bb,tmp,tmp2) dwdb = matrix([rtw - tmp2,rtb + blas.dot(mul(d2i,lt),rtt)]) lapack.potrs(M,dwdb) # compute dt = inv(D2)*(rtt - Bb'*dw + lt*db) tmp2 = matrix(0.0,(k,1)) blas.gemv(Bb, dwdb[:n], tmp2, trans='T') dt = mul(d2i, rtt - tmp2 + lt*dwdb[-1]) # compute dv = inv(Db)*(rbv + inv(Wl1)^2*(E*dt - D*X*dw - d*db)) dv = matrix(0.0,(m,1)) blas.gemv(X,dwdb[:n],dv,alpha = -1.0) dv = mul(d,dv) - d*dwdb[-1] base.gemv(E, dt, dv, beta = 1.0) tmp = +dv # tmp = E*dt - D*X*dw - d*db dv = mul(dbi, rbv + mul(W['di'][:m]**2,dv)) # compute wdz1 = inv(Wl1)*(E*dt - D*X*dw - d*db - dv - rl1) wdz1 = mul(W['di'][:m], tmp - dv) - iw_rl1 # compute wdz2 = - inv(Wl2)*(dv + rl2) wdz2 = - mul(W['di'][m:2*m],dv) - iw_rl2 # compute wdzi = inv(Wi)*([-ei'*dt; -Pi*dw] - ri) wdzi = [] tmp = matrix(0.0,(n,1)) for i in range(k): blas.gemv(P[i],dwdb[:n],tmp, alpha = -1.0, beta = 0.0) tmp1 = matrix([-dt[i],tmp]) blas.axpy(ri[i],tmp1,alpha = -1.0) qscal(tmp1,W['beta'][i],W['v'][i],inv=True) wdzi.append(tmp1) # solution x[:n] = dwdb[:n] x[n:n+k] = dt x[n+k] = dwdb[-1] x[n+k+1:] = dv z[:m] = wdz1 z[m:2*m] = wdz2 for i in range(k): z[2*m+i*(n+1):2*m+(i+1)*(n+1)] = wdzi[i] return f
def solve_phase1(self,kktsolver='chol',MM = 1e5): """ Solves primal Phase I problem using the feasible start solver. Returns primal feasible X. """ from cvxopt import cholmod, amd k = 1e-3 # compute Schur complement matrix Id = [i*(self.n+1) for i in range(self.n)] As = self._A[:,1:] As[Id,:] /= sqrt(2.0) M = spmatrix([],[],[],(self.m,self.m)) base.syrk(As,M,trans='T') u = +self.b # compute least-norm solution F = cholmod.symbolic(M) cholmod.numeric(M,F) cholmod.solve(F,u) x = 0.5*self._A[:,1:]*u X0 = spmatrix(x[self.V[:].I],self.V.I,self.V.J,(self.n,self.n)) # test feasibility p = amd.order(self.V) #Xc,Nf = chompack.embed(X0,p) #E = chompack.project(Xc,spmatrix(1.0,range(self.n),range(self.n))) symb = chompack.symbolic(self.V,p) Xc = chompack.cspmatrix(symb) + X0 try: # L = chompack.completion(Xc) L = Xc.copy() chompack.completion(L) # least-norm solution is feasible return X0,None except: pass # create Phase I SDP object trA = matrix(0.0,(self.m+1,1)) e = matrix(1.0,(self.n,1)) Aa = self._A[Id,1:] base.gemv(Aa,e,trA,trans='T') trA[-1] = MM P1 = SDP() P1._A = misc.phase1_sdp(self._A,trA) P1._b = matrix([self.b-k*trA[:self.m],MM]) P1._agg_sparsity() # find feasible starting point for Phase I problem tMIN = 0.0 tMAX = 1.0 while True: t = (tMIN+tMAX)/2.0 #Xt = chompack.copy(Xc) #chompack.axpy(E,Xt,t) Xt = Xc.copy() + spmatrix(t,list(range(self.n)),list(range(self.n))) try: # L = chompack.completion(Xt) L = Xt.copy() chompack.completion(L) tMAX = t if tMAX - tMIN < 1e-1: break except: tMAX *= 2.0 tMIN = t tt = t + 1.0 U = X0 + spmatrix(tt,list(range(self.n,)),list(range(self.n))) trU = sum(U[:][Id]) Z0 = spdiag([U,spmatrix([tt+k,MM-trU],[0,1],[0,1],(2,2))]) sol = P1.solve_feas(primalstart = {'x':Z0}, kktsolver = kktsolver) s = sol['x'][-2,-2] - k if s > 0: return None,P1 else: sol.pop('y') sol.pop('s') X0 = sol.pop('x')[:self.n,:self.n]\ - spmatrix(s,list(range(self.n)),list(range(self.n))) return X0,sol
def solve_phase1(self, kktsolver='chol', MM=1e5): """ Solves primal Phase I problem using the feasible start solver. Returns primal feasible X. """ from cvxopt import cholmod, amd k = 1e-3 # compute Schur complement matrix Id = [i * (self.n + 1) for i in range(self.n)] As = self._A[:, 1:] As[Id, :] /= sqrt(2.0) M = spmatrix([], [], [], (self.m, self.m)) base.syrk(As, M, trans='T') u = +self.b # compute least-norm solution F = cholmod.symbolic(M) cholmod.numeric(M, F) cholmod.solve(F, u) x = 0.5 * self._A[:, 1:] * u X0 = spmatrix(x[self.V[:].I], self.V.I, self.V.J, (self.n, self.n)) # test feasibility p = amd.order(self.V) #Xc,Nf = chompack.embed(X0,p) #E = chompack.project(Xc,spmatrix(1.0,range(self.n),range(self.n))) symb = chompack.symbolic(self.V, p) Xc = chompack.cspmatrix(symb) + X0 try: # L = chompack.completion(Xc) L = Xc.copy() chompack.completion(L) # least-norm solution is feasible return X0 except: pass # create Phase I SDP object trA = matrix(0.0, (self.m + 1, 1)) e = matrix(1.0, (self.n, 1)) Aa = self._A[Id, 1:] base.gemv(Aa, e, trA, trans='T') trA[-1] = MM P1 = SDP() P1._A = misc.phase1_sdp(self._A, trA) P1._b = matrix([self.b - k * trA[:self.m], MM]) P1._agg_sparsity() # find feasible starting point for Phase I problem tMIN = 0.0 tMAX = 1.0 while True: t = (tMIN + tMAX) / 2.0 #Xt = chompack.copy(Xc) #chompack.axpy(E,Xt,t) Xt = Xc.copy() + spmatrix(t, range(self.n), range(self.n)) try: # L = chompack.completion(Xt) L = Xt.copy() chompack.completion(L) tMAX = t if tMAX - tMIN < 1e-1: break except: tMAX *= 2.0 tMIN = t tt = t + 1.0 U = X0 + spmatrix(tt, range(self.n, ), range(self.n)) trU = sum(U[:][Id]) Z0 = spdiag([U, spmatrix([tt + k, MM - trU], [0, 1], [0, 1], (2, 2))]) sol = P1.solve_feas(primalstart={'x': Z0}, kktsolver=kktsolver) s = sol['x'][-2, -2] - k if s > 0: return None, P1 else: sol.pop('y') sol.pop('s') X0 = sol.pop('x')[:self.n,:self.n]\ - spmatrix(s,range(self.n),range(self.n)) return X0, sol