Example #1
0
    def _compute(self, params, solver_args={}):
        params = [p.numpy() for p in params]
        nbatch = (0 if len(params[0].shape) == len(self.params[0].shape)
                  else params[0].shape[0])

        if nbatch > 0:
            split_params = [[np.squeeze(p) for p in np.split(param, nbatch)]
                            for param in params]
            params_per_problem = [
                [param_list[i] for param_list in split_params]
                for i in range(nbatch)]
            As, bs, cs = zip(*[
                self._problem_data_from_params(p) for p in params_per_problem])
            xs, _, ss, _, DT = diffcp.solve_and_derivative_batch(
                As=As, bs=bs, cs=cs, cone_dicts=[self.cones] * nbatch,
                **solver_args)
            DT = self._restrict_DT_to_dx(DT, nbatch, ss[0].shape)
            solns = [self._split_solution(x) for x in xs]
            # soln[i] is a tensor with first dimension equal to nbatch, holding
            # the optimal values for variable i
            solution = [
                tf.stack([s[i] for s in solns]) for i in range(self.n_vars)]
        else:
            A, b, c = self._problem_data_from_params(params)
            x, _, s, _, DT = diffcp.solve_and_derivative(
                A=A, b=b, c=c, cone_dict=self.cones, **solver_args)
            DT = self._restrict_DT_to_dx(DT, nbatch, s.shape)
            solution = self._split_solution(x)

        def gradient_function(*dsoln):
            if nbatch > 0:
                # split the batched dsoln tensors into lists, with one list
                # corresponding to each problem in the batch
                dsoln_lists = [[] for _ in range(nbatch)]
                for value in dsoln:
                    tensors = tf.split(value, nbatch)
                    for dsoln_list, t in zip(dsoln_lists, tensors):
                        dsoln_list.append(tf.squeeze(t))
                dxs = [self._dx_from_dsoln(dsoln_list)
                       for dsoln_list in dsoln_lists]
                dAs, dbs, dcs = DT(dxs)
                dparams_dict_unbatched = [
                    self.asa_maps.apply_param_jac(dc, -dA, db) for
                    (dA, db, dc) in zip(dAs, dbs, dcs)]
                dparams = []
                for p in self.params:
                    dparams.append(
                        tf.constant([d[p.id] for d in dparams_dict_unbatched]))
                return dparams
            else:
                dx = self._dx_from_dsoln(dsoln)
                dA, db, dc = DT(dx)
                dparams_dict = self.asa_maps.apply_param_jac(dc, -dA, db)
                return tuple(tf.constant(
                    dparams_dict[p.id]) for p in self.params)
        return solution, gradient_function
def cvgm_glasso(samples, alpha0, K=10, p=0.7, tol=1e-4, max_iters=20, gradient_update=gradient_descent_update):
    nsamples = samples.shape[0]
    num_training = int(p*nsamples)

    training_masks = [np.zeros(nsamples, dtype=bool) for _ in range(K)]
    for mask in training_masks:
        mask[:num_training] = True
        np.random.shuffle(mask)

    training_datasets = [samples[mask] for mask in training_masks]
    test_datasets = [samples[~mask] for mask in training_masks]

    diff = float('inf')
    curr_alpha = alpha0
    num_iter = 0
    while diff > tol and num_iter < max_iters:
        num_iter += 1
        gradients = []

        for training_data, test_data in zip(training_datasets, test_datasets):
            cov_train = np.cov(training_data, rowvar=False)
            p = cov_train.shape[1]
            theta_size = int(p*(p+1)/2)

            # write program in correct form and run it
            A, b, c, cone_dict = write_glasso_cone_program(cov_train, curr_alpha)
            x, y, s, derivative, adjoint_derivative = solve_and_derivative(A, b, c, cone_dict)
            theta = _vec2symmetric(x[:theta_size], p)

            print("=====")
            print(f"Training loss: {np.sum(cov_train * theta) - np.log(np.linalg.det(theta))}")

            cov_test = np.cov(test_data, rowvar=False)
            print(f"Test loss: {np.sum(cov_test * theta) - np.log(np.linalg.det(theta))}")
            dl_dtheta = cov_test - np.linalg.inv(theta)

            dc = np.zeros(c.shape)
            dc[-theta_size:] = 1
            dx, dy, ds = derivative(np.zeros(A.shape), np.zeros(b.shape), dc)
            dtheta_d_alpha = _vec2symmetric(dx[:theta_size], p)

            dl_dalpha = np.sum(dl_dtheta * dtheta_d_alpha)
            gradients.append(dl_dalpha)
        print(f"gradients: {gradients}")

        # update alpha
        alpha_grad = np.mean(gradients)
        diff = np.linalg.norm(alpha_grad)
        curr_alpha = gradient_update(curr_alpha, alpha_grad)
        curr_alpha = max(curr_alpha, 0)

        print(alpha_grad, curr_alpha)

    # s = prob(samples, new_alpha)
    return curr_alpha
Example #3
0
def main(n=3, p=3):
    # Generate problem data
    C = randn_psd(n)
    As = [randn_symm(n) for _ in range(p)]
    Bs = np.random.randn(p)

    # Extract problem data using cvxpy
    X = cp.Variable((n, n), PSD=True)
    objective = cp.trace(C @ X)
    constraints = [cp.trace(As[i] @ X) == Bs[i] for i in range(p)]
    prob = cp.Problem(cp.Minimize(objective), constraints)
    A, b, c, cone_dims = scs_data_from_cvxpy_problem(prob)

    # Print problem size
    mn_plus_m_plus_n = A.size + b.size + c.size
    n_plus_2n = c.size + 2 * b.size
    entries_in_derivative = mn_plus_m_plus_n * n_plus_2n
    print(
        f"""n={n}, p={p}, A.shape={A.shape}, nnz in A={A.nnz}, derivative={mn_plus_m_plus_n}x{n_plus_2n} ({entries_in_derivative} entries)"""
    )

    # Compute solution and derivative maps
    start = time.perf_counter()
    x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
        A, b, c, cone_dims, eps=1e-5)
    end = time.perf_counter()
    print("Compute solution and set up derivative: %.2f s." % (end - start))

    # Derivative
    lsqr_args = dict(atol=1e-5, btol=1e-5)
    start = time.perf_counter()
    dA, db, dc = adjoint_derivative(diffcp.cones.vec_symm(C), np.zeros(y.size),
                                    np.zeros(s.size), **lsqr_args)
    end = time.perf_counter()
    print("Evaluate derivative: %.2f s." % (end - start))

    # Adjoint of derivative
    start = time.perf_counter()
    dx, dy, ds = derivative(A, b, c, **lsqr_args)
    end = time.perf_counter()
    print("Evaluate adjoint of derivative: %.2f s." % (end - start))
def main(n=3, p=3):
    # Generate problem data
    C = randn_psd(n)
    As = [randn_symm(n) for _ in range(p)]
    Bs = np.random.randn(p)

    # Extract problem data using cvxpy
    X = cp.Variable((n, n), PSD=True)
    objective = cp.trace(C @ X)
    constraints = [cp.trace(As[i] @ X) == Bs[i] for i in range(p)]
    prob = cp.Problem(cp.Minimize(objective), constraints)
    A, b, c, cone_dims = scs_data_from_cvxpy_problem(prob)
    cone_dims = {'f': 25, 's': [50]}
    print(cone_dims)

    # Compute solution and derivative maps
    print(A.shape, b.shape, c.shape, cone_dims)
    x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
        A, b, c, cone_dims, eps=1e-5)

    return dict(A=A, b=b, c=c), cone_dims
def solve_and_time(folder, cone_dict, num_programs):
    print(f"Solving programs in {folder}")

    burn_in = 2
    solve_times = np.zeros(num_programs - burn_in)
    deriv_times = np.zeros(num_programs - burn_in)
    adjoint_times = np.zeros(num_programs - burn_in)
    for program_num in trange(num_programs):
        program = load_cone_program(f"{folder}/{program_num}.txt")
        A, b, c = program["A"], program["b"], program["c"]
        A = csc_matrix(A)

        start = perf_counter()
        x, y, s, D, DT = diffcp.solve_and_derivative(A,
                                                     b,
                                                     c,
                                                     cone_dict,
                                                     eps=1e-5)
        if program_num >= burn_in:
            solve_times[program_num - burn_in] = perf_counter() - start

        start = perf_counter()
        D(np.zeros(A.shape), np.zeros(b.shape), np.ones(c.shape))
        if program_num >= burn_in:
            deriv_times[program_num - burn_in] = perf_counter() - start

        start = perf_counter()
        DT(np.ones(x.shape), np.zeros(y.shape), np.ones(s.shape))
        if program_num >= burn_in:
            adjoint_times[program_num - burn_in] = perf_counter() - start

    print(f"Solving took an average of {np.mean(solve_times)} seconds")
    print(f"Derivatives took an average of {np.mean(deriv_times)} seconds")
    print(f"Adjoints took an average of {np.mean(adjoint_times)} seconds")
    np.savetxt(f"{folder}_diffcp_solve_times.txt", solve_times)
    np.savetxt(f"{folder}_diffcp_deriv_times.txt", deriv_times)
    np.savetxt(f"{folder}_diffcp_adjoint_times.txt", adjoint_times)
Example #6
0
K = {
    'f': 3,  # ZERO
    'l': 3,  # POS
    'q': [5]  # SOC
}

m = 3 + 3 + 5
n = 5

np.random.seed(11)

program = random_cone_prog(m, n, K)
A, b, c = program["A"], program["b"], program["c"]

# We solve the cone program and get the derivative and its adjoint
x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
    A, b, c, K, solve_method="SCS", verbose=False)
save_cone_program("test_programs/scs_test_program.txt",
                  program=dict(A=A, b=b, c=c, x_star=x, y_star=y, s_star=s),
                  dense=False)

print("x =", x)
print("y =", y)
print("s =", s)

dx, dy, ds = derivative(A, b, c)

# We evaluate the gradient of the objective with respect to A, b and c.
dA, db, dc = adjoint_derivative(c,
                                np.zeros(m),
                                np.zeros(m),
                                atol=1e-10,
# We generate a random cone program with a cone
# defined as a product of a 3-d fixed cone, 3-d positive orthant cone,
# and a 5-d second order cone.
K = {'f': 3, 'l': 3, 'q': [5]}

m = 3 + 3 + 5
n = 5

np.random.seed(0)

program = random_cone_prog(m, n, K)
A, b, c = program["A"], program["b"], program["c"]

# We solve the cone program and get the derivative and its adjoint
x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
    A, b, c, K, eps=1e-10)

print("x =", x)
print("y =", y)
print("s =", s)

# We evaluate the gradient of the objective with respect to A, b and c.
dA, db, dc = adjoint_derivative(c,
                                np.zeros(m),
                                np.zeros(m),
                                atol=1e-10,
                                btol=1e-10)

# The gradient of the objective with respect to b should be
# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
# Boyd & Vandenberghe).
Example #8
0
# and a 5-d second order cone.
K = {
    'f': 3,
    'l': 3,
    'q': [5]
}

m = 3 + 3 + 5
n = 5

np.random.seed(0)

A, b, c = utils.random_cone_prog(m, n, K)

# We solve the cone program and get the derivative and its adjoint
x, y, s, derivative, adjoint_derivative = diffcp.solve_and_derivative(
    A, b, c, K, solver="ECOS", verbose=False)

print("x =", x)
print("y =", y)
print("s =", s)

# We evaluate the gradient of the objective with respect to A, b and c.
dA, db, dc = adjoint_derivative(c, np.zeros(
    m), np.zeros(m), atol=1e-10, btol=1e-10)

# The gradient of the objective with respect to b should be
# equal to minus the dual variable y (see, e.g., page 268 of Convex Optimization by
# Boyd & Vandenberghe).
print("db =", db)
print("-y =", -y)
Example #9
0
def derivative(A, b, c, cone_dict, **kwargs):
    """
    Solves the problem

    minimize   \|(Ax+s-b,A^Ty+c,c^Tx+b^Ty)\|_2  (1)
    subject to s in K, y in K^*,

    and computes the derivative of the objective
    with respect to A, b, and c. The objective
    of this problem is 0 if and only if
    (A,b,c,K) form a non-pathological cone program.

    Args:
        - A: m x n matrix 
        - b: m vector
        - c: n vector
        - cone_dict: dict representing K,
            in SCS format

    Returns:
        - dA: m x n matrix with same sparsity pattern as A.
            Derivative of objective with respect to A.
        - db: m vector. Derivative of objective with respect to b.
        - dc: n vector. Derivative of objective with respect to c.
        - objective: Objective value of (1).
        - x: n vector. Solution to (1).
        - y: m vector. Solution to (1).
        - s: m vector. Solution to (1).
    """
    m, n = A.shape

    expected_m = dims_from_cone_dict(cone_dict)
    assert m == expected_m, "A has %d rows, but should have %d rows." % (
        m, expected_m)
    assert b.size == m
    assert c.size == n

    dual_cone_dict = dual_cone(cone_dict)
    num_free = delete_free_cones(dual_cone_dict)
    Asoc = sparse.bmat([[-1.0, None, None, None],
                        [None, -A, None, -sparse.eye(m)],
                        [None, None, -A.T, None],
                        [None, -c[None, :], -b[None, :], None]])
    Ahat = sparse.bmat([[
        sparse.csc_matrix((m, 1)),
        sparse.csc_matrix((m, n)), None, -sparse.eye(m)
    ],
                        [
                            sparse.csc_matrix((m, 1)),
                            sparse.csc_matrix((m, n)), -sparse.eye(m), None
                        ]],
                       format='csc')

    # Reformat (Asoc, Ahat) while combining the cones in cone_dict
    # and dual_cone_dict
    pd_cone_dict = {}
    idx = 0
    idx_dual = num_free + m
    mats = []
    inserted_soc_cone = False
    for cone in CONES:
        for dual in [False, True]:
            if cone in [ZERO, POS, EXP, EXP_DUAL]:  # integer cones
                num_cone = (dual_cone_dict if dual else cone_dict).get(cone, 0)
                dim = num_cone
                if cone == EXP or cone == EXP_DUAL:
                    dim *= 3
                if num_cone > 0:
                    pd_cone_dict[cone] = pd_cone_dict.get(cone, 0) + num_cone
                    if not dual:
                        mats.append(Ahat[idx:idx + dim, :])
                        idx += dim
                    else:
                        mats.append(Ahat[idx_dual:idx_dual + dim, :])
                        idx_dual += dim
            else:  # list cones
                if cone == SOC and not inserted_soc_cone:
                    pd_cone_dict[SOC] = [Asoc.shape[0]]
                    mats.append(Asoc)
                    idx_first_soc_cone = idx + idx_dual - m - num_free
                    inserted_soc_cone = True
                cone_list = (dual_cone_dict if dual else cone_dict).get(
                    cone, [])
                dim = sum(cone_list)
                if cone == PSD:
                    dim = sum([vec_psd_dim(c) for c in cone_list])
                if len(cone_list) > 0:
                    pd_cone_dict[cone] = pd_cone_dict.get(cone, []) + cone_list
                    if not dual:
                        mats.append(Ahat[idx:idx + dim, :])
                        idx += dim
                    else:
                        mats.append(Ahat[idx_dual:idx_dual + dim, :])
                        idx_dual += dim

    assert idx == m
    assert idx_dual == 2 * m

    # Prepare (Ahat, bhat, chat)
    Ahat = sparse.vstack(mats, format='csc')
    bhat = np.zeros(Ahat.shape[0])
    bhat[idx_first_soc_cone:idx_first_soc_cone + Asoc.shape[0]] = \
        np.concatenate([np.zeros(1), -b, c, np.zeros(1)])
    chat = np.append(1.0, np.zeros(Ahat.shape[1] - 1))

    # Solve problem and extract optimal value and solution
    x_internal, y_internal, s_internal, _, DT = diffcp.solve_and_derivative(
        Ahat, bhat, chat, pd_cone_dict, **kwargs)
    objective = x_internal[0]
    y = x_internal[1 + n:1 + n + m]
    s = x_internal[1 + n + m:1 + n + 2 * m]
    x = x_internal[1:1 + n]

    # Compute derivatives with respect to Ahat, bhat
    dAhat, dbhat, _ = DT(chat, np.zeros(bhat.size), np.zeros(bhat.size))

    # Extract derivatives with respect to A, b, c from Ahat and bhat
    dAsoc = dAhat[idx_first_soc_cone:idx_first_soc_cone + Asoc.shape[0], :]
    last_row_dAsoc = np.array(dAsoc[-1, :].todense()).ravel()
    dA = -dAsoc[1:1 + m, 1:1 + n] - dAsoc[1 + m:1 + m + n, 1 + n:1 + m + n].T
    db = -last_row_dAsoc[1 + n:1 + n + m]
    db -= dbhat[idx_first_soc_cone + 1:idx_first_soc_cone + 1 + m]
    dc = -last_row_dAsoc[1:1 + n]
    dc += dbhat[idx_first_soc_cone + 1 + m:idx_first_soc_cone + 1 + m + n]

    return dA, db, dc, objective, x, y, s, (x_internal, y_internal, s_internal)
import numpy as np
from scipy import sparse

import diffcp
from py_utils.random_program import random_cone_prog

cone_dict = {diffcp.ZERO: 3, diffcp.POS: 3, diffcp.SOC: [5]}

m = 3 + 3 + 5
n = 5

A, b, c = random_cone_prog(m, n, cone_dict)
x, y, s, D, DT = diffcp.solve_and_derivative(A, b, c, cone_dict)

# evaluate the derivative
nonzeros = A.nonzero()
data = 1e-4 * np.random.randn(A.size)
dA = sparse.csc_matrix((data, nonzeros), shape=A.shape)
db = 1e-4 * np.random.randn(m)
dc = 1e-4 * np.random.randn(n)
dx, dy, ds = D(dA, db, dc)

# evaluate the adjoint of the derivative
dx = c
dy = np.zeros(m)
ds = np.zeros(m)
dA, db, dc = DT(dx, dy, ds)
    import scs
    import ecos
    a = np.random.normal(size=(100, 3))
    S = np.cov(a, rowvar=False)
    A, b, c, cone_dict = write_glasso_cone_program(S, 1.)
    x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
    print(_vec2symmetric((A @ x)[1:22], 6))
    x = Matrix(sympy.symbols([
        "theta11", "theta21", "theta31", "theta22", "theta32", "theta33",
        "z11", "z22", "z33", "z21", "z31", "z32",
        "t1", "t2", "t3", "t"
    ]))

    print(Matrix(b) - Matrix(A.toarray()) @ x)
    print(Matrix(c).T @ x)
    sol = diffcp.solve_and_derivative(A, b, c, cone_dict)
    K = np.linalg.inv(S)
    sol = scs.solve(dict(A=A, b=b, c=c), cone_dict, eps=1e-15, max_iters=10000, verbose=True, acceleration_lookback=1)
    x = sol["x"]
    p = S.shape[0]
#     d = int(S.shape[0]*(S.shape[0]+1)/2)
#     theta = _vec2symmetric(x[:d], p)
#     print(theta[:3, :3])
#     print(np.linalg.inv(S)[:3, :3])
#     print(np.linalg.norm(theta - np.linalg.inv(S), "fro"))

    # theta = [1, 2, 3, 4, 5, 6]
    # z = [7, 8, 9, 10, 11, 12]
    # x = np.array([*theta, *z])
    # A = merge_psd_plus_diag(len(theta), len(z), 3)
    # merged = A @ x