def _compute(self, params, solver_args={}): params = [p.numpy() for p in params] nbatch = (0 if len(params[0].shape) == len(self.params[0].shape) else params[0].shape[0]) if nbatch > 0: split_params = [[np.squeeze(p) for p in np.split(param, nbatch)] for param in params] params_per_problem = [ [param_list[i] for param_list in split_params] for i in range(nbatch)] As, bs, cs = zip(*[ self._problem_data_from_params(p) for p in params_per_problem]) xs, _, ss, _, DT = diffcp.solve_and_derivative_batch( As=As, bs=bs, cs=cs, cone_dicts=[self.cones] * nbatch, **solver_args) DT = self._restrict_DT_to_dx(DT, nbatch, ss[0].shape) solns = [self._split_solution(x) for x in xs] # soln[i] is a tensor with first dimension equal to nbatch, holding # the optimal values for variable i solution = [ tf.stack([s[i] for s in solns]) for i in range(self.n_vars)] else: A, b, c = self._problem_data_from_params(params) x, _, s, _, DT = diffcp.solve_and_derivative( A=A, b=b, c=c, cone_dict=self.cones, **solver_args) DT = self._restrict_DT_to_dx(DT, nbatch, s.shape) solution = self._split_solution(x) def gradient_function(*dsoln): if nbatch > 0: # split the batched dsoln tensors into lists, with one list # corresponding to each problem in the batch dsoln_lists = [[] for _ in range(nbatch)] for value in dsoln: tensors = tf.split(value, nbatch) for dsoln_list, t in zip(dsoln_lists, tensors): dsoln_list.append(tf.squeeze(t)) dxs = [self._dx_from_dsoln(dsoln_list) for dsoln_list in dsoln_lists] dAs, dbs, dcs = DT(dxs) dparams_dict_unbatched = [ self.asa_maps.apply_param_jac(dc, -dA, db) for (dA, db, dc) in zip(dAs, dbs, dcs)] dparams = [] for p in self.params: dparams.append( tf.constant([d[p.id] for d in dparams_dict_unbatched])) return dparams else: dx = self._dx_from_dsoln(dsoln) dA, db, dc = DT(dx) dparams_dict = self.asa_maps.apply_param_jac(dc, -dA, db) return tuple(tf.constant( dparams_dict[p.id]) for p in self.params) return solution, gradient_function
def cvgm_glasso(samples, alpha0, K=10, p=0.7, tol=1e-4, max_iters=20, gradient_update=gradient_descent_update): nsamples = samples.shape[0] num_training = int(p*nsamples) training_masks = [np.zeros(nsamples, dtype=bool) for _ in range(K)] for mask in training_masks: mask[:num_training] = True np.random.shuffle(mask) training_datasets = [samples[mask] for mask in training_masks] test_datasets = [samples[~mask] for mask in training_masks] diff = float('inf') curr_alpha = alpha0 num_iter = 0 while diff > tol and num_iter < max_iters: num_iter += 1 gradients = [] for training_data, test_data in zip(training_datasets, test_datasets): cov_train = np.cov(training_data, rowvar=False) p = cov_train.shape[1] theta_size = int(p*(p+1)/2) # write program in correct form and run it A, b, c, cone_dict = write_glasso_cone_program(cov_train, curr_alpha) x, y, s, derivative, adjoint_derivative = solve_and_derivative(A, b, c, cone_dict) theta = _vec2symmetric(x[:theta_size], p) print("=====") print(f"Training loss: {np.sum(cov_train * theta) - np.log(np.linalg.det(theta))}") cov_test = np.cov(test_data, rowvar=False) print(f"Test loss: {np.sum(cov_test * theta) - np.log(np.linalg.det(theta))}") dl_dtheta = cov_test - np.linalg.inv(theta) dc = np.zeros(c.shape) dc[-theta_size:] = 1 dx, dy, ds = derivative(np.zeros(A.shape), np.zeros(b.shape), dc) dtheta_d_alpha = _vec2symmetric(dx[:theta_size], p) dl_dalpha = np.sum(dl_dtheta * dtheta_d_alpha) gradients.append(dl_dalpha) print(f"gradients: {gradients}") # update alpha alpha_grad = np.mean(gradients) diff = np.linalg.norm(alpha_grad) curr_alpha = gradient_update(curr_alpha, alpha_grad) curr_alpha = max(curr_alpha, 0) print(alpha_grad, curr_alpha) # s = prob(samples, new_alpha) return curr_alpha
def main(n=3, p=3): # Generate problem data C = randn_psd(n) As = [randn_symm(n) for _ in range(p)] Bs = np.random.randn(p) # Extract problem data using cvxpy X = cp.Variable((n, n), PSD=True) objective = cp.trace(C @ X) constraints = [cp.trace(As[i] @ X) == Bs[i] for i in range(p)] prob = cp.Problem(cp.Minimize(objective), constraints) A, b, c, cone_dims = scs_data_from_cvxpy_problem(prob) # Print problem size mn_plus_m_plus_n = A.size + b.size + c.size n_plus_2n = c.size + 2 * b.size entries_in_derivative = mn_plus_m_plus_n * n_plus_2n print( f"""n={n}, p={p}, A.shape={A.shape}, nnz in A={A.nnz}, derivative={mn_plus_m_plus_n}x{n_plus_2n} ({entries_in_derivative} entries)""" ) # Compute solution and derivative maps start = time.perf_counter() x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( A, b, c, cone_dims, eps=1e-5) end = time.perf_counter() print("Compute solution and set up derivative: %.2f s." % (end - start)) # Derivative lsqr_args = dict(atol=1e-5, btol=1e-5) start = time.perf_counter() dA, db, dc = adjoint_derivative(diffcp.cones.vec_symm(C), np.zeros(y.size), np.zeros(s.size), **lsqr_args) end = time.perf_counter() print("Evaluate derivative: %.2f s." % (end - start)) # Adjoint of derivative start = time.perf_counter() dx, dy, ds = derivative(A, b, c, **lsqr_args) end = time.perf_counter() print("Evaluate adjoint of derivative: %.2f s." % (end - start))
def main(n=3, p=3): # Generate problem data C = randn_psd(n) As = [randn_symm(n) for _ in range(p)] Bs = np.random.randn(p) # Extract problem data using cvxpy X = cp.Variable((n, n), PSD=True) objective = cp.trace(C @ X) constraints = [cp.trace(As[i] @ X) == Bs[i] for i in range(p)] prob = cp.Problem(cp.Minimize(objective), constraints) A, b, c, cone_dims = scs_data_from_cvxpy_problem(prob) cone_dims = {'f': 25, 's': [50]} print(cone_dims) # Compute solution and derivative maps print(A.shape, b.shape, c.shape, cone_dims) x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( A, b, c, cone_dims, eps=1e-5) return dict(A=A, b=b, c=c), cone_dims
def solve_and_time(folder, cone_dict, num_programs): print(f"Solving programs in {folder}") burn_in = 2 solve_times = np.zeros(num_programs - burn_in) deriv_times = np.zeros(num_programs - burn_in) adjoint_times = np.zeros(num_programs - burn_in) for program_num in trange(num_programs): program = load_cone_program(f"{folder}/{program_num}.txt") A, b, c = program["A"], program["b"], program["c"] A = csc_matrix(A) start = perf_counter() x, y, s, D, DT = diffcp.solve_and_derivative(A, b, c, cone_dict, eps=1e-5) if program_num >= burn_in: solve_times[program_num - burn_in] = perf_counter() - start start = perf_counter() D(np.zeros(A.shape), np.zeros(b.shape), np.ones(c.shape)) if program_num >= burn_in: deriv_times[program_num - burn_in] = perf_counter() - start start = perf_counter() DT(np.ones(x.shape), np.zeros(y.shape), np.ones(s.shape)) if program_num >= burn_in: adjoint_times[program_num - burn_in] = perf_counter() - start print(f"Solving took an average of {np.mean(solve_times)} seconds") print(f"Derivatives took an average of {np.mean(deriv_times)} seconds") print(f"Adjoints took an average of {np.mean(adjoint_times)} seconds") np.savetxt(f"{folder}_diffcp_solve_times.txt", solve_times) np.savetxt(f"{folder}_diffcp_deriv_times.txt", deriv_times) np.savetxt(f"{folder}_diffcp_adjoint_times.txt", adjoint_times)
K = { 'f': 3, # ZERO 'l': 3, # POS 'q': [5] # SOC } m = 3 + 3 + 5 n = 5 np.random.seed(11) program = random_cone_prog(m, n, K) A, b, c = program["A"], program["b"], program["c"] # We solve the cone program and get the derivative and its adjoint x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( A, b, c, K, solve_method="SCS", verbose=False) save_cone_program("test_programs/scs_test_program.txt", program=dict(A=A, b=b, c=c, x_star=x, y_star=y, s_star=s), dense=False) print("x =", x) print("y =", y) print("s =", s) dx, dy, ds = derivative(A, b, c) # We evaluate the gradient of the objective with respect to A, b and c. dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m), atol=1e-10,
# We generate a random cone program with a cone # defined as a product of a 3-d fixed cone, 3-d positive orthant cone, # and a 5-d second order cone. K = {'f': 3, 'l': 3, 'q': [5]} m = 3 + 3 + 5 n = 5 np.random.seed(0) program = random_cone_prog(m, n, K) A, b, c = program["A"], program["b"], program["c"] # We solve the cone program and get the derivative and its adjoint x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( A, b, c, K, eps=1e-10) print("x =", x) print("y =", y) print("s =", s) # We evaluate the gradient of the objective with respect to A, b and c. dA, db, dc = adjoint_derivative(c, np.zeros(m), np.zeros(m), atol=1e-10, btol=1e-10) # The gradient of the objective with respect to b should be # equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by # Boyd & Vandenberghe).
# and a 5-d second order cone. K = { 'f': 3, 'l': 3, 'q': [5] } m = 3 + 3 + 5 n = 5 np.random.seed(0) A, b, c = utils.random_cone_prog(m, n, K) # We solve the cone program and get the derivative and its adjoint x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative( A, b, c, K, solver="ECOS", verbose=False) print("x =", x) print("y =", y) print("s =", s) # We evaluate the gradient of the objective with respect to A, b and c. dA, db, dc = adjoint_derivative(c, np.zeros( m), np.zeros(m), atol=1e-10, btol=1e-10) # The gradient of the objective with respect to b should be # equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by # Boyd & Vandenberghe). print("db =", db) print("-y =", -y)
def derivative(A, b, c, cone_dict, **kwargs): """ Solves the problem minimize \|(Ax+s-b,A^Ty+c,c^Tx+b^Ty)\|_2 (1) subject to s in K, y in K^*, and computes the derivative of the objective with respect to A, b, and c. The objective of this problem is 0 if and only if (A,b,c,K) form a non-pathological cone program. Args: - A: m x n matrix - b: m vector - c: n vector - cone_dict: dict representing K, in SCS format Returns: - dA: m x n matrix with same sparsity pattern as A. Derivative of objective with respect to A. - db: m vector. Derivative of objective with respect to b. - dc: n vector. Derivative of objective with respect to c. - objective: Objective value of (1). - x: n vector. Solution to (1). - y: m vector. Solution to (1). - s: m vector. Solution to (1). """ m, n = A.shape expected_m = dims_from_cone_dict(cone_dict) assert m == expected_m, "A has %d rows, but should have %d rows." % ( m, expected_m) assert b.size == m assert c.size == n dual_cone_dict = dual_cone(cone_dict) num_free = delete_free_cones(dual_cone_dict) Asoc = sparse.bmat([[-1.0, None, None, None], [None, -A, None, -sparse.eye(m)], [None, None, -A.T, None], [None, -c[None, :], -b[None, :], None]]) Ahat = sparse.bmat([[ sparse.csc_matrix((m, 1)), sparse.csc_matrix((m, n)), None, -sparse.eye(m) ], [ sparse.csc_matrix((m, 1)), sparse.csc_matrix((m, n)), -sparse.eye(m), None ]], format='csc') # Reformat (Asoc, Ahat) while combining the cones in cone_dict # and dual_cone_dict pd_cone_dict = {} idx = 0 idx_dual = num_free + m mats = [] inserted_soc_cone = False for cone in CONES: for dual in [False, True]: if cone in [ZERO, POS, EXP, EXP_DUAL]: # integer cones num_cone = (dual_cone_dict if dual else cone_dict).get(cone, 0) dim = num_cone if cone == EXP or cone == EXP_DUAL: dim *= 3 if num_cone > 0: pd_cone_dict[cone] = pd_cone_dict.get(cone, 0) + num_cone if not dual: mats.append(Ahat[idx:idx + dim, :]) idx += dim else: mats.append(Ahat[idx_dual:idx_dual + dim, :]) idx_dual += dim else: # list cones if cone == SOC and not inserted_soc_cone: pd_cone_dict[SOC] = [Asoc.shape[0]] mats.append(Asoc) idx_first_soc_cone = idx + idx_dual - m - num_free inserted_soc_cone = True cone_list = (dual_cone_dict if dual else cone_dict).get( cone, []) dim = sum(cone_list) if cone == PSD: dim = sum([vec_psd_dim(c) for c in cone_list]) if len(cone_list) > 0: pd_cone_dict[cone] = pd_cone_dict.get(cone, []) + cone_list if not dual: mats.append(Ahat[idx:idx + dim, :]) idx += dim else: mats.append(Ahat[idx_dual:idx_dual + dim, :]) idx_dual += dim assert idx == m assert idx_dual == 2 * m # Prepare (Ahat, bhat, chat) Ahat = sparse.vstack(mats, format='csc') bhat = np.zeros(Ahat.shape[0]) bhat[idx_first_soc_cone:idx_first_soc_cone + Asoc.shape[0]] = \ np.concatenate([np.zeros(1), -b, c, np.zeros(1)]) chat = np.append(1.0, np.zeros(Ahat.shape[1] - 1)) # Solve problem and extract optimal value and solution x_internal, y_internal, s_internal, _, DT = diffcp.solve_and_derivative( Ahat, bhat, chat, pd_cone_dict, **kwargs) objective = x_internal[0] y = x_internal[1 + n:1 + n + m] s = x_internal[1 + n + m:1 + n + 2 * m] x = x_internal[1:1 + n] # Compute derivatives with respect to Ahat, bhat dAhat, dbhat, _ = DT(chat, np.zeros(bhat.size), np.zeros(bhat.size)) # Extract derivatives with respect to A, b, c from Ahat and bhat dAsoc = dAhat[idx_first_soc_cone:idx_first_soc_cone + Asoc.shape[0], :] last_row_dAsoc = np.array(dAsoc[-1, :].todense()).ravel() dA = -dAsoc[1:1 + m, 1:1 + n] - dAsoc[1 + m:1 + m + n, 1 + n:1 + m + n].T db = -last_row_dAsoc[1 + n:1 + n + m] db -= dbhat[idx_first_soc_cone + 1:idx_first_soc_cone + 1 + m] dc = -last_row_dAsoc[1:1 + n] dc += dbhat[idx_first_soc_cone + 1 + m:idx_first_soc_cone + 1 + m + n] return dA, db, dc, objective, x, y, s, (x_internal, y_internal, s_internal)
import numpy as np from scipy import sparse import diffcp from py_utils.random_program import random_cone_prog cone_dict = {diffcp.ZERO: 3, diffcp.POS: 3, diffcp.SOC: [5]} m = 3 + 3 + 5 n = 5 A, b, c = random_cone_prog(m, n, cone_dict) x, y, s, D, DT = diffcp.solve_and_derivative(A, b, c, cone_dict) # evaluate the derivative nonzeros = A.nonzero() data = 1e-4 * np.random.randn(A.size) dA = sparse.csc_matrix((data, nonzeros), shape=A.shape) db = 1e-4 * np.random.randn(m) dc = 1e-4 * np.random.randn(n) dx, dy, ds = D(dA, db, dc) # evaluate the adjoint of the derivative dx = c dy = np.zeros(m) ds = np.zeros(m) dA, db, dc = DT(dx, dy, ds)
import scs import ecos a = np.random.normal(size=(100, 3)) S = np.cov(a, rowvar=False) A, b, c, cone_dict = write_glasso_cone_program(S, 1.) x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]) print(_vec2symmetric((A @ x)[1:22], 6)) x = Matrix(sympy.symbols([ "theta11", "theta21", "theta31", "theta22", "theta32", "theta33", "z11", "z22", "z33", "z21", "z31", "z32", "t1", "t2", "t3", "t" ])) print(Matrix(b) - Matrix(A.toarray()) @ x) print(Matrix(c).T @ x) sol = diffcp.solve_and_derivative(A, b, c, cone_dict) K = np.linalg.inv(S) sol = scs.solve(dict(A=A, b=b, c=c), cone_dict, eps=1e-15, max_iters=10000, verbose=True, acceleration_lookback=1) x = sol["x"] p = S.shape[0] # d = int(S.shape[0]*(S.shape[0]+1)/2) # theta = _vec2symmetric(x[:d], p) # print(theta[:3, :3]) # print(np.linalg.inv(S)[:3, :3]) # print(np.linalg.norm(theta - np.linalg.inv(S), "fro")) # theta = [1, 2, 3, 4, 5, 6] # z = [7, 8, 9, 10, 11, 12] # x = np.array([*theta, *z]) # A = merge_psd_plus_diag(len(theta), len(z), 3) # merged = A @ x