Example #1
0
def test_soft_thresholding_od():
    """Test soft_thresholding_od function."""
    # matrix OD
    array = np.arange(9).reshape(3, 3)
    output = np.array([[0, 0, 1], [2, 4, 4], [5, 6, 8]])
    assert_array_equal(prox.soft_thresholding_od(array, 1), output)

    # tensor OD
    array = np.arange(27).reshape(3, 3, 3)
    output = np.array(
        [
            [[0, 0, 1], [2, 4, 4],
             [5, 6, 8]], [[9, 9, 10], [11, 13, 13], [14, 15, 17]],
            [[18, 18, 19], [20, 22, 22], [23, 24, 26]]
        ])
    assert_array_equal(prox.soft_thresholding_od(array, 1), output)

    # tensor OD, lamda is a list
    array = np.arange(27).reshape(3, 3, 3)
    output = np.array(
        [
            [[0, 0, 1], [2, 4, 4],
             [5, 6, 8]], [[9, 8, 9], [10, 13, 12], [13, 14, 17]],
            [[18, 16, 17], [18, 22, 20], [21, 22, 26]]
        ])

    assert_array_equal(
        prox.soft_thresholding_od(array, np.arange(1, 4)), output)
Example #2
0
def choose_gamma(
    gamma,
    x,
    beta,
    alpha,
    lamda,
    grad,
    function_f=None,
    delta=1e-4,
    eps=0.5,
    max_iter=1000,
    p=1,
    x_inv=None,
    choose="gamma",
    laplacian_penalty=False,
):
    """Choose gamma for backtracking.

    References
    ----------
    Salzo S. (2017). https://doi.org/10.1137/16M1073741

    """
    fx = function_f(K=x)
    for i in range(max_iter):
        if laplacian_penalty:
            prox = soft_thresholding_od(x - gamma * grad, alpha * gamma)
        else:
            prox = prox_FL(x - gamma * grad,
                           beta * gamma,
                           alpha * gamma,
                           p=p,
                           symmetric=True)
        if positive_definite(prox) and choose != "gamma":
            break

        if choose == "gamma":
            y_minus_x = prox - x
            loss_diff = function_f(K=x + lamda * y_minus_x) - fx

            tolerance = _scalar_product(y_minus_x, grad)
            tolerance += delta / gamma * _scalar_product(y_minus_x, y_minus_x)
            if loss_diff <= lamda * tolerance:
                break
        gamma *= eps

    return gamma, prox
Example #3
0
def tgl_forward_backward(
    emp_cov, alpha=0.01, beta=1., max_iter=100, n_samples=None, verbose=False,
    tol=1e-4, delta=1e-4, gamma=1., lamda=1., eps=0.5, debug=False,
    return_history=False, return_n_iter=True, choose='gamma',
    lamda_criterion='b', time_norm=1, compute_objective=True,
    return_n_linesearch=False, vareps=1e-5, stop_at=None, stop_when=1e-4,
        laplacian_penalty=False, init='empirical'):
    """Time-varying graphical lasso solver with forward-backward splitting.

    Solves the following problem via FBS:
        min sum_{i=1}^T -n_i log_likelihood(S_i, K_i) + alpha*||K_i||_{od,1}
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_times, n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameters.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    verbose : bool, default False
        Print info at each iteration.
    tol : float, optional
        Absolute tolerance for convergence.
    delta, gamma, lamda, eps : float, optional
        FBS parameters.
    debug : bool, default False
        Run in debug mode.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    choose : ('gamma', 'lambda', 'fixed', 'both)
        Search iteratively gamma / lambda / none / both.
    lamda_criterion : ('a', 'b', 'c')
        Criterion to choose lamda. See ref for details.
    time_norm : float, optional
        Choose the temporal norm between points.
    compute_objective : bool, default True
        Choose to compute the objective value.
    return_n_linesearch : bool, optional
        Return the number of line-search iterations before convergence.
    vareps : float, optional
        Jitter for the loss.
    stop_at, stop_when : float, optional
        Other convergence criteria, as used in the paper.
    laplacian_penalty : bool, default False
        Use Laplacian penalty.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K, covariance : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    available_choose = ('gamma', 'lamda', 'fixed', 'both')
    if choose not in available_choose:
        raise ValueError(
            "`choose` parameter must be one of %s." % available_choose)

    n_times, _, n_features = emp_cov.shape
    K = init_precision(emp_cov, mode=init)

    if laplacian_penalty:
        obj_partial = partial(
            objective_laplacian, n_samples=n_samples, emp_cov=emp_cov,
            alpha=alpha, beta=beta, vareps=vareps)
        function_f = partial(
            loss_laplacian, beta=beta, n_samples=n_samples, S=emp_cov,
            vareps=vareps)
        gradient_f = partial(
            grad_loss_laplacian, emp_cov=emp_cov, beta=beta,
            n_samples=n_samples, vareps=vareps)
        function_g = partial(penalty_laplacian, alpha=alpha)
    else:
        psi = partial(vector_p_norm, p=time_norm)
        obj_partial = partial(
            objective, n_samples=n_samples, emp_cov=emp_cov, alpha=alpha,
            beta=beta, psi=psi, vareps=vareps)
        function_f = partial(
            loss, n_samples=n_samples, S=emp_cov, vareps=vareps)
        gradient_f = partial(
            grad_loss, emp_cov=emp_cov, n_samples=n_samples, vareps=vareps)
        function_g = partial(penalty, alpha=alpha, beta=beta, psi=psi)

    max_residual = -np.inf
    n_linesearch = 0
    checks = [convergence(obj=obj_partial(precision=K))]
    for iteration_ in range(max_iter):
        k_previous = K.copy()
        x_inv = np.array([linalg.pinvh(x) for x in K])
        grad = gradient_f(K, x_inv=x_inv)

        if choose in ['gamma', 'both']:
            gamma, y = choose_gamma(
                gamma / eps if iteration_ > 0 else gamma, K,
                function_f=function_f, beta=beta, alpha=alpha, lamda=lamda,
                grad=grad, delta=delta, eps=eps, max_iter=200, p=time_norm,
                x_inv=x_inv, choose=choose,
                laplacian_penalty=laplacian_penalty)

        x_hat = K - gamma * grad
        if choose not in ['gamma', 'both']:
            if laplacian_penalty:
                y = soft_thresholding_od(x_hat, alpha * gamma)
            else:
                y = prox_FL(
                    x_hat, beta * gamma, alpha * gamma, p=time_norm,
                    symmetric=True)

        if choose in ('lamda', 'both'):
            lamda, n_ls = choose_lamda(
                min(lamda / eps if iteration_ > 0 else lamda,
                    1), K, function_f=function_f, objective_f=obj_partial,
                gradient_f=gradient_f, function_g=function_g, gamma=gamma,
                delta=delta, eps=eps, criterion=lamda_criterion, max_iter=200,
                p=time_norm, grad=grad, prox=y, vareps=vareps)
            n_linesearch += n_ls

        K = K + min(max(lamda, 0), 1) * (y - K)
        # K, t = fista_step(Y, Y - Y_old, t)

        check = convergence(
            obj=obj_partial(precision=K),
            rnorm=np.linalg.norm(upper_diag_3d(K) - upper_diag_3d(k_previous)),
            snorm=np.linalg.norm(
                obj_partial(precision=K) - obj_partial(precision=k_previous)),
            e_pri=np.sqrt(upper_diag_3d(K).size) * tol + tol * max(
                np.linalg.norm(upper_diag_3d(K)),
                np.linalg.norm(upper_diag_3d(k_previous))), e_dual=tol)

        if verbose and iteration_ % (50 if verbose < 2 else 1) == 0:
            print(
                "obj: %.4f, rnorm: %.7f, snorm: %.4f,"
                "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        if return_history:
            checks.append(check)

        if np.isnan(check.rnorm) or np.isnan(check.snorm):
            warnings.warn("precision is not positive definite.")

        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break
        else:
            # use this convergence criterion
            subgrad = (x_hat - K) / gamma
            if 0:
                if laplacian_penalty:
                    grad = grad_loss_laplacian(
                        K, emp_cov, n_samples, vareps=vareps)
                else:
                    grad = grad_loss(K, emp_cov, n_samples, vareps=vareps)
                res_norm = np.linalg.norm(grad + subgrad)

                if iteration_ == 0:
                    normalizer = res_norm + 1e-6
                max_residual = max(
                    np.linalg.norm(grad), np.linalg.norm(subgrad)) + 1e-6
            else:
                res_norm = np.linalg.norm(K - k_previous) / gamma
                max_residual = max(max_residual, res_norm)
                normalizer = max(
                    np.linalg.norm(grad), np.linalg.norm(subgrad)) + 1e-6

            r_rel = res_norm / max_residual
            r_norm = res_norm / normalizer

            if not debug and (r_rel <= tol
                              or r_norm <= tol) and iteration_ > 0:  # or (
                # check.rnorm <= check.e_pri and iteration_ > 0):
                break
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(k) for k in K])
    return_list = [K, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    if return_n_linesearch:
        return_list.append(n_linesearch)
    return return_list
Example #4
0
def _fit(X,
         alpha=1e-2,
         gamma=1e-3,
         tol=1e-3,
         max_iter=1000,
         verbose=0,
         return_history=True,
         compute_objective=True,
         warm_start=None,
         return_n_iter=False,
         adjust_gamma=False,
         A=None,
         T=0,
         rho=1,
         update_gamma=0.5,
         line_search=False):
    n, d = X.shape
    if warm_start is None:
        theta = np.zeros((d, d))
    else:
        theta = check_array(warm_start)

    thetas = [theta]
    theta_new = theta.copy()
    checks = []
    for iter_ in range(max_iter):
        theta_old = thetas[-1]
        if not line_search:
            grad = _gradient_ising(X, theta, n, A, rho, T)
            theta_new = theta - gamma * grad
            theta = (theta_new + theta_new.T) / 2
            theta = soft_thresholding_od(theta, alpha * gamma)
        else:
            while True:
                grad = _gradient_ising(X, theta, n, A, rho, T)
                theta_new = theta - gamma * grad
                theta = (theta_new + theta_new.T) / 2
                theta = soft_thresholding_od(theta, alpha * gamma)
                print(theta)
                loss_new = loss(X, theta)
                loss_old = loss(X, theta_old)
                # Line search
                diff_theta2 = np.linalg.norm(theta_old - theta)**2
                grad_diff = np.trace(grad.dot(theta_old - theta))
                diff = loss_old - grad_diff + (diff_theta2 / (2 * gamma))

                if loss_new > diff or np.isinf(loss_new) or np.isnan(loss_new):
                    gamma = update_gamma * gamma
                    theta = theta_old - gamma * grad
                    theta = soft_thresholding_od(theta, alpha * gamma)
                    loss_new = loss(X, theta)
                    diff = loss_old - grad_diff + (diff_theta2 / (2 * gamma))
                else:
                    break
        thetas.append(theta)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            check = convergence(
                iter=iter_,
                obj=objective(X, theta, alpha),
                iter_norm=np.linalg.norm(thetas[-2] - thetas[-1]),
                iter_r_norm=(np.linalg.norm(thetas[-2] - thetas[-1]) /
                             np.linalg.norm(thetas[-1])))
        checks.append(check)
        # if adjust_gamma: # TODO multiply or divide
        if verbose:
            print('Iter: %d, objective: %.4f, iter_norm %.4f' %
                  (check[0], check[1], check[2]))

        if np.abs(check[2]) < tol:
            break

    return_list = [thetas[-1]]
    if return_history:
        return_list.append(thetas)
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iter_)

    return return_list
Example #5
0
def inequality_time_graphical_lasso(S, K_init, max_iter, loss, C, theta,
                                    c_prox, rho, div, psi, gamma, tol, rtol,
                                    verbose, return_history, return_n_iter,
                                    mode, compute_objective, stop_at,
                                    stop_when, update_rho_options, init):
    """Inequality constrained time-varying graphical LASSO solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1})
        s.t. objective =< c_i for i = 1, ..., T

    where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    gamma: float, optional
        Kernel parameter when psi is chosen to be 'kernel'.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    psi_name = psi.__name__

    if loss == 'LL':
        loss_function = neg_logl
    else:
        loss_function = dtrace

    Z_0 = K_init  # init_precision(S, mode=init)
    Z_1 = Z_0.copy()[:-1]
    Z_2 = Z_0.copy()[1:]

    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    # divisor for consensus variables, accounting for one less matrix for t = 0 and t = T
    divisor = np.full(S.shape[0], 2, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    out_obj = []

    checks = [convergence(obj=penalty_objective(Z_0, Z_1, Z_2, psi, theta))]

    for iteration_ in range(max_iter):
        A_K_pen = np.zeros_like(Z_0)
        A_K_pen[:-1] += Z_1 - U_1
        A_K_pen[1:] += Z_2 - U_2
        A_K_pen += A_K_pen.transpose(0, 2, 1)
        A_K_pen /= 2.

        Z_0 = soft_thresholding_od(A_K_pen / divisor[:, None, None],
                                   lamda=theta / (rho * divisor))

        # check feasibility and perform line search if necessary
        losses_all = loss_gen(loss_function, S, Z_0)
        feasibility_check = losses_all > C
        infeasible_indices = list(
            compress(range(len(feasibility_check)), feasibility_check))

        for i in infeasible_indices:
            if c_prox == 'cvx':
                Z_0[i], loss_i = prox_cvx(loss_function, S[i], Z_0[i],
                                          Z_0_old[i], C[i], div)
            elif c_prox == 'grad':
                if i > 0:
                    Z_0[i], loss_i = prox_grad(loss_function, S[i], Z_0[i],
                                               Z_0_old[i], C[i], 0.)
                else:
                    Z_0[i], loss_i = prox_grad(loss_function, S[i], Z_0[i],
                                               Z_0_old[i], C[i], 0.)

        # break if losses post-correction blow up
        losses_all_new = loss_gen(loss_function, S, Z_0)
        if np.inf in losses_all_new:
            print(iteration_, 'Inf')
            covariance_ = np.array([linalg.pinvh(x) for x in Z_0_old])
            return_list = [Z_0_old, covariance_]
            if return_history:
                return_list.append(checks)
            if return_n_iter:
                return_list.append(iteration_)
            return return_list

        # other Zs
        A_1 = Z_0[:-1] + U_1
        A_2 = Z_0[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * (1 - theta) / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * (1 - theta) / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        U_1 += Z_0[:-1] - Z_1
        U_2 += Z_0[1:] - Z_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(Z_0[:-1] - Z_1) + squared_norm(Z_0[1:] - Z_2))
        snorm = rho * np.sqrt(
            squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old))

        obj = penalty_objective(Z_0, Z_1, Z_2, psi, theta)

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(losses_all_new.size + 2 * Z_1.size) * tol + rtol *
            (max(np.sqrt(squared_norm(losses_all_new)), np.sqrt(
                squared_norm(C))) +
             max(np.sqrt(squared_norm(Z_1)), np.sqrt(squared_norm(Z_0[:-1]))) +
             max(np.sqrt(squared_norm(Z_2)), np.sqrt(squared_norm(Z_0[1:])))),
            e_dual=np.sqrt(2 * Z_1.size) * tol +
            rtol * rho * np.sqrt(squared_norm(U_1) + squared_norm(U_2)))

        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        out_obj.append(penalty_objective(Z_0, Z_0[:-1], Z_0[1:], psi, theta))
        checks.append(check)

        # if len(out_obj) > 100 and c_prox == 'grad':
        #     if (np.mean(out_obj[-11:-1]) - np.mean(out_obj[-10:])) < stop_when:
        #         print('obj break')
        #         break

        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        # rho_new = update_rho(
        #     rho, rnorm, snorm, iteration=iteration_,
        #     mu=1e2, tau_inc=1.01, tau_dec=1.01)
        #     # **(update_rho_options or {}))
        # # scaled dual variables should be also rescaled
        # U_1 *= rho / rho_new
        # U_2 *= rho / rho_new
        # rho = rho_new

    else:
        warnings.warn("Objective did not converge.")

    print(iteration_, out_obj[-1])
    # print(out_obj)
    print(check.rnorm, check.e_pri)
    print(check.snorm, check.e_dual)

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
 def _Z_0(x1, x2, Z_0, loss_res, nabla_con, nabla_pen):
     A = Z_0 - x2 * (1 - theta) * nabla_pen
     # A = Z_0 - x1 * nabla_con - x2 * (1 - theta) * nabla_pen
     A -= x1 * loss_res[:, None, None] * nabla_con
     return soft_thresholding_od(A, lamda=x2 * theta), A
def taylor_time_graphical_lasso(
    S, K_init, max_iter, loss, C, theta, rho, mult, 
    weights, m, eps, psi, gamma, tol, rtol, verbose, 
    return_history, return_n_iter, mode, compute_objective, 
    stop_at, stop_when, update_rho_options
    ):
    """Equality constrained time-varying graphical LASSO solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1})
        s.t. objective = c_i for i = 1, ..., T

    where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    gamma: float, optional
        Kernel parameter when psi is chosen to be 'kernel'.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    if loss == 'LL':
        loss_func = neg_logl
    else:
        loss_func = dtrace

    T = S.shape[0]
    S_flat = S.copy().reshape(T, S.shape[1] * S.shape[2])
    I_flat = np.diagflat(S.shape[1]).ravel()

    K = K_init.copy()
    Z_0 = K_init.copy()
    Z_1 = Z_0.copy()[:-1] 
    Z_2 = Z_0.copy()[1:]  

    u = np.zeros(T)
    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = Z_0.copy()
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    # divisor for consensus variables, accounting for one less matrix for t = 0 and t = T
    divisor = np.full(T, 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    rho = rho * np.ones(T)    
    if weights[0] is not None:
        if weights[0] == 'rbf':
            weights = rbf_weights(T, weights[1], mult)
        elif weights[0] == 'exp':
            weights = exp_weights(T, weights[1], mult)
        elif weights[0] == 'lin':
            weights = lin_weights(T, weights[1], mult)
        con_obj = {}
        for t in range(T):
            con_obj[t] = []

    con_obj_mean = []
    con_obj_max = []

    # loss residuals
    loss_res = np.zeros(T)
    loss_init = loss_gen(loss_func, S, Z_0_old)
    loss_res_old = loss_init - C

    # loss_diff = C - loss_init
    # C_  = C - loss_diff

    out_obj = []

    checks = [
        convergence(
            obj=penalty_objective(Z_0, Z_1, Z_2, psi, theta))
    ]


    def _K(x, A_t, g_t, nabla_t, nabla_t_T_A_t, nabla_t_T_nabla_t, rho_t, divisor_t):
        _K_t = (A_t + x * g_t * nabla_t - 
                    (x * nabla_t_T_A_t + x ** 2 * g_t * nabla_t_T_nabla_t) * nabla_t  / 
                    (divisor_t * rho_t + x * nabla_t_T_nabla_t)
                ).reshape(S.shape[1], S.shape[2])
        _K_t /= (rho_t * divisor_t)
        return 0.5 * (_K_t + _K_t.transpose(1, 0))


    # def _K(x, A_t, nabla_t):
    #     _A_t = A_t - x * nabla_t
    #     return _A_t


    # constrained optimisation via line search
    def _f(x, _K, A_t, g_t, nabla_t, nabla_t_T_A_t, nabla_t_T_nabla_t, rho_t, divisor_t, 
            loss_func, S_t, c_t, loss_res_old_t, nabla_t_T_K_old_t):
        _K_t = _K(x, A_t, g_t, nabla_t, nabla_t_T_A_t, nabla_t_T_nabla_t, rho_t, divisor_t)
        loss_res_t = loss_func(S_t, _K_t) - c_t
        return loss_res_t ** 2 + (loss_res_t - loss_res_old_t - nabla_t @ _K_t.ravel() + nabla_t_T_K_old_t) ** 2


    # # constrained optimisation via line search
    # def _f(x, _K, A_t, nabla_t, loss_func, S_t, c_t, loss_res_old_t):
    #     _K_t = _K(x, A_t, nabla_t)
    #     loss_res_t = loss_func(S_t, _K_t) - c_t
    #     return loss_res_t ** 2 + (loss_res_t - loss_res_old_t - np.sum(nabla_t * (_K_t - A_t))) ** 2


    for iteration_ in range(max_iter):
        # update K
        A = rho[:, None, None] * (Z_0 - U_0)
        A[:-1] += rho[:-1, None, None] * (Z_1 - U_1)
        A[1:] += rho[1:, None, None] * (Z_2 - U_2)
        # A += A.transpose(0, 2, 1)
        # A /= 2. 
        # A /= (rho * divisor)[:, None, None]

        # loss_res_pre = loss_gen(loss_func, S, A) - C

        if loss_func.__name__ == 'neg_logl':
            nabla = np.array([S_t - np.linalg.inv(K_t).ravel() for (S_t, K_t) in zip(S_flat, K)])
            # nabla = np.array([S_t - np.linalg.inv(K_t) for (S_t, K_t) in zip(S, A)])
        elif loss_func.__name__ == 'dtrace': 
            nabla = np.array([(2 * K_t.ravel() @ S_t - I) for (S_t, K_t) in zip(S_flat, K)])
            # nabla = np.array([(2 * K_t @ S_t - I) for (S_t, K_t) in zip(S, K)])
        nabla_T_K_old = np.array([nabla_t @ K_t.ravel() for (nabla_t, K_t) in zip(nabla, K)])
        # nabla_T_K_old = np.array([np.sum(nabla_t * K_t) for (nabla_t, K_t) in zip(nabla, K)])
        g = nabla_T_K_old - loss_res_old
        nabla_T_A = np.array([nabla_t @ A_t.ravel() for (nabla_t, A_t) in zip(nabla, A)])
        nabla_T_nabla =  np.einsum('ij,ij->i', nabla, nabla)
        
        if iteration_ == 0:
            nabla = np.zeros_like(S_flat)
            # nabla = np.zeros_like(S)
            nabla_T_K_old = np.zeros(T)
            g = np.zeros(T)
            nabla_T_A = np.zeros(T)
            nabla_T_nabla = np.zeros(T)

        col = []

        for t in range(T):
            out = minimize_scalar(
                    partial(_f, _K=_K, A_t=A[t].ravel(), g_t=g[t], nabla_t=nabla[t], 
                            nabla_t_T_A_t=nabla_T_A[t], nabla_t_T_nabla_t=nabla_T_nabla[t], 
                            rho_t=rho[t], divisor_t=divisor[t], loss_func=loss_func, 
                            S_t=S[t], c_t=C[t], loss_res_old_t=loss_res_old[t], 
                            nabla_t_T_K_old_t=nabla_T_K_old[t])
                    )
            # out = minimize_scalar(
            #         partial(_f, _K=_K, A_t=A[t], nabla_t=nabla[t], loss_func=loss_func, 
            #                 S_t=S[t], c_t=C[t], loss_res_old_t=loss_res_pre[t])
            #         )
            K[t] = _K(out.x, A[t].ravel(), g[t], nabla[t], nabla_T_A[t], nabla_T_nabla[t], rho[t], divisor[t])
            # K[t] = _K(out.x, A[t], nabla[t])
            loss_res[t] = loss_func(S[t], K[t]) - C[t]
            # u[t] += loss_res[t]    
            if weights[0] is not None:
                con_obj[t].append(loss_res[t] ** 2)    
                if len(con_obj[t]) > m and np.mean(con_obj[t][-m:-int(m/2)]) < np.mean(con_obj[t][-int(m/2):]) and loss_res[t] > eps:
                    col.append(t)
                
        # update Z_0
        _Z_0 = K + U_0
        _Z_0 += _Z_0.transpose(0, 2, 1)
        _Z_0 /= 2.
        Z_0 = soft_thresholding_od(_Z_0, lamda=theta / rho[:, None, None])
                
        # update Z_1, Z_2
        A_1 = Z_0[:-1] + U_1
        A_2 = Z_0[1:] + U_2
        if not psi_node_penalty:
            A_add = A_2 + A_1
            A_sub = A_2 - A_1
            prox_e_1 = prox_psi(A_sub, lamda=2. * (1 - theta) / rho[:-1, None, None])
            prox_e_2 = prox_psi(A_sub, lamda=2. * (1 - theta) / rho[1:, None, None])
            Z_1 = .5 * (A_add - prox_e_1)
            Z_2 = .5 * (A_add + prox_e_2)
        # TODO: Fix for rho vector
        # else:
        #     if weights is not None:
        #         Z_1, Z_2 = prox_psi(
        #             np.concatenate((A_1, A_2), axis=1), lamda=.5 * (1 - theta) / rho[t],
        #             rho=rho[t], tol=tol, rtol=rtol, max_iter=max_iter)

        # update residuals
        con_obj_mean.append(np.mean(loss_res) ** 2)
        con_obj_max.append(np.max(loss_res))

        U_0 += K - Z_0 
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(K - Z_0) + 
            squared_norm(K[:-1] - Z_1) +
             squared_norm(K[1:] - Z_2)
            )

        loss_res_old = loss_res.copy()
        
        snorm = np.sqrt(
                    squared_norm(rho[:, None, None] * (Z_0 - Z_0_old)) + 
                    squared_norm(rho[:-1, None, None] * (Z_1 - Z_1_old)) + 
                    squared_norm(rho[1:, None, None] * (Z_2 - Z_2_old))
                )

        e_dual = np.sqrt(Z_0.size + 2 * Z_1.size) * tol + rtol * np.sqrt(
                    squared_norm(rho[:, None, None] * U_0) + 
                    squared_norm(rho[:-1, None, None] * U_1) + 
                    squared_norm(rho[1:, None, None] * U_2)
                 )

        obj = objective(loss_res, Z_0, Z_1, Z_2, psi, theta)

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(loss_res.size + Z_0.size + 2 * Z_1.size) * tol + rtol * 
                (
                max(np.sqrt(squared_norm(Z_0)), np.sqrt(squared_norm(K))) +
                max(np.sqrt(squared_norm(Z_1)), np.sqrt(squared_norm(K[:-1]))) + 
                max(np.sqrt(squared_norm(Z_2)), np.sqrt(squared_norm(K[1:])))
                ),
            e_dual=e_dual
        )

        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print(
                "obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        out_obj.append(penalty_objective(Z_0, Z_0[:-1], Z_0[1:], psi, theta))
        if not iteration_ % 100:
            print(iteration_)
            print(np.max(con_obj_max[-1]), np.mean(loss_res))
            print(out_obj[-1])
        checks.append(check)

        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        if weights[0] is None:
            if len(con_obj_mean) > m:
                if np.mean(con_obj_mean[-m:-int(m/2)]) < np.mean(con_obj_mean[-int(m/2):]) and np.max(loss_res) > eps:
                # or np.mean(con_obj_max[-100:-50]) < np.mean(con_obj_max[-50:])) # np.mean(loss_res) > 0.25:
                    print("Rho Mult", mult * rho[0], iteration_, np.mean(loss_res), con_obj_max[-1])
                    # loss_diff /= 5            
                    # C_ = C - loss_diff           
                    # resscale scaled dual variables
                    rho = mult * rho
                    # u /= mult
                    U_0 /= mult
                    U_1 /= mult
                    U_2 /= mult
                    con_obj_mean = []
                    con_obj_max = []
        else:
            for t in col:
                rho *= weights[t]
                # u /= weights[t]
                U_0 /= weights[t][:, None, None]
                U_1 /= weights[t][:-1, None, None]
                U_2 /= weights[t][1:, None, None]
                con_obj[t] = []
                print('Mult', iteration_, t, rho[t])    
    else:
        warnings.warn("Objective did not converge.")

    print(iteration_, out_obj[-1])
    # print(out_obj)
    print(check.rnorm, check.e_pri)
    print(check.snorm, check.e_dual)

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
def time_graphical_lasso(emp_cov,
                         alpha=0.01,
                         rho=1,
                         beta=1,
                         theta=0.5,
                         max_iter=100,
                         n_samples=None,
                         verbose=False,
                         psi='laplacian',
                         tol=1e-4,
                         rtol=1e-4,
                         return_history=False,
                         return_n_iter=True,
                         mode='admm',
                         compute_objective=True,
                         stop_at=None,
                         stop_when=1e-4,
                         update_rho_options=None,
                         init='empirical'):
    """Time-varying graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(S_i, K_i) + alpha*||K_i||_{od,1}
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    Z_0 = init_precision(emp_cov, mode=init)
    Z_1 = Z_0.copy()[:-1]  # np.zeros_like(emp_cov)[:-1]
    Z_2 = Z_0.copy()[1:]  # np.zeros_like(emp_cov)[1:]

    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    if n_samples is None:
        n_samples = np.ones(emp_cov.shape[0])

    checks = [
        convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_1, Z_2,
                                  alpha, beta, psi))
    ]
    for iteration_ in range(max_iter):
        # update K
        A = Z_0 - U_0
        A[:-1] += Z_1 - U_1
        A[1:] += Z_2 - U_2
        A += A.transpose(0, 2, 1)
        A /= 2.

        A *= -rho / n_samples[:, None, None]
        A += emp_cov

        K = np.array([
            prox_logdet_alt(a, lamda=rho * div) for a, div in zip(A, divisor)
        ])

        # update Z_0
        A = K + U_0
        A += A.transpose(0, 2, 1)
        A /= 2.
        Z_0 = soft_thresholding_od(A, lamda=alpha / rho)

        # other Zs
        A_1 = K[:-1] + U_1
        A_2 = K[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * beta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        U_0 += K - Z_0
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(K - Z_0) + squared_norm(K[:-1] - Z_1) +
            squared_norm(K[1:] - Z_2))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old))

        obj = objective(
            n_samples, emp_cov, Z_0, K, Z_1, Z_2, alpha, beta, psi) \
            if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(K.size + 2 * Z_1.size) * tol + rtol * max(
                np.sqrt(
                    squared_norm(Z_0) + squared_norm(Z_1) + squared_norm(Z_2)),
                np.sqrt(
                    squared_norm(K) + squared_norm(K[:-1]) +
                    squared_norm(K[1:]))),
            e_dual=np.sqrt(K.size + 2 * Z_1.size) * tol + rtol * rho *
            np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
            # precision=Z_0.copy()
        )
        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U_0 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new

        #assert is_pos_def(Z_0)
    else:
        warnings.warn("Objective did not converge.")

    print(iteration_, penalty_objective(Z_0, Z_0[:-1], Z_0[1:], psi, theta))

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
Example #9
0
def graphical_lasso(
    emp_cov,
    alpha=0.01,
    rho=1,
    over_relax=1,
    max_iter=100,
    verbose=False,
    tol=1e-4,
    rtol=1e-4,
    return_history=False,
    return_n_iter=True,
    update_rho_options=None,
    compute_objective=True,
    init="empirical",
):
    r"""Graphical lasso solver via ADMM.

    Solves the following problem:
        minimize  trace(S*K) - log det K + alpha ||K||_{od,1}

    where S = (1/n) X^T \times X is the empirical covariance of the data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : array-like
        Empirical covariance matrix.
    alpha : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    over_relax : float, optional
        Over-relaxation parameter (typically between 1.0 and 1.8).
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    precision_ : numpy.array, 2-dimensional
        Solution to the problem.
    covariance_ : np.array, 2 dimensional
        Empirical covariance matrix.
    n_iter_ : int
        If return_n_iter, returns the number of iterations before convergence.
    history_ : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    Z = init_precision(emp_cov, mode=init)
    U = np.zeros_like(emp_cov)
    Z_old = np.zeros_like(Z)

    checks = []
    for iteration_ in range(max_iter):
        # x-update
        A = Z - U
        A += A.T
        A /= 2.0
        K = prox_logdet(emp_cov - rho * A, lamda=1.0 / rho)

        # z-update with relaxation
        K_hat = over_relax * K - (1 - over_relax) * Z
        Z = soft_thresholding_od(K_hat + U, lamda=alpha / rho)

        # update residuals
        U += K_hat - Z

        # diagnostics, reporting, termination checks
        obj = objective(emp_cov, K, Z, alpha) if compute_objective else np.nan
        rnorm = np.linalg.norm(K - Z, "fro")
        snorm = rho * np.linalg.norm(Z - Z_old, "fro")
        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(K.size) * tol +
            rtol * max(np.linalg.norm(K, "fro"), np.linalg.norm(Z, "fro")),
            e_dual=np.sqrt(K.size) * tol + rtol * rho * np.linalg.norm(U),
        )

        Z_old = Z.copy()
        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [Z, emp_cov]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
def equality_time_graphical_lasso(
        S,
        K_init,
        max_iter,
        loss,
        C,
        rho,  # n_samples=None, 
        psi,
        gamma,
        tol,
        rtol,
        verbose,
        return_history,
        return_n_iter,
        mode,
        compute_objective,
        stop_at,
        stop_when,
        update_rho_options,
        init):
    """Equality constrained time-varying graphical LASSO solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1})
        s.t. objective = c_i for i = 1, ..., T

    where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    gamma: float, optional
        Kernel parameter when psi is chosen to be 'kernel'.
    constrained_to: float or ndarray, shape (time steps)
        Log likelihood constraints for K_i
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    psi_name = psi.__name__

    if loss == 'LL':
        loss_function = neg_logl
    else:
        loss_function = dtrace

    K = K_init
    Z_0 = K.copy()
    Z_1 = K.copy()[:-1]
    Z_2 = K.copy()[1:]

    u = np.zeros((S.shape[0]))
    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    I = np.eye(S.shape[1])

    checks = [
        convergence(
            obj=equality_objective(loss_function, S, K, C, Z_0, Z_1, Z_2, psi))
    ]

    for iteration_ in range(max_iter):
        # update K
        A_K = U_0 - Z_0
        A_K[:-1] += Z_1 - U_1
        A_K[1:] += Z_2 - U_2
        A_K += A_K.transpose(0, 2, 1)
        A_K /= 2.

        K = soft_thresholding_od(A_K, lamda=1. / rho)

        # update Z_0
        residual_loss_constraint_u = loss_gen(loss_function, S, Z_0) - C + u

        A_Z = K + U_0
        A_Z += A_Z.transpose(0, 2, 1)
        A_Z /= 2.

        if loss_function == neg_logl:
            A_Z -= residual_loss_constraint_u[:, None, None] * S
            Z_0 = np.array([
                prox_logdet_constrained(_A, _a, I)
                for _A, _a in zip(A_Z, residual_loss_constraint_u)
            ])
        elif loss_function == dtrace:
            Z_0 = np.array([
                prox_dtrace_constrained(_A, _S, _a, I)
                for _A, _S, _a in zip(A_Z, S, residual_loss_constraint_u)
            ])

        # other Zs
        A_1 = K[:-1] + U_1
        A_2 = K[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        residual_loss_constraint = loss_gen(loss_function, S, Z_0) - C
        u += residual_loss_constraint
        U_0 += K - Z_0
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        print(residual_loss_constraint)

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            np.sum(residual_loss_constraint**2) + squared_norm(K - Z_0) +
            squared_norm(K[:-1] - Z_1) + squared_norm(K[1:] - Z_2))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old))

        obj = equality_objective(loss_function, S, K, C, Z_0, Z_1, Z_2,
                                 psi) if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(Z_0.size + 2 * Z_1.size + S.shape[0]) * tol +
            rtol * max(
                np.sqrt(
                    np.sum(C**2) + squared_norm(Z_0) + squared_norm(Z_1) +
                    squared_norm(Z_2)),
                np.sqrt(
                    np.sum(
                        (residual_loss_constraint + C)**2) + squared_norm(K) +
                    squared_norm(K[:-1]) + squared_norm(K[1:]))),
            e_dual=np.sqrt(Z_0.size + 2 * Z_1.size) * tol + rtol * rho *
            np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
        )
        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u *= rho / rho_new
        U_0 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new

        #assert is_pos_def(Z_0)
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in K])
    return_list = [K, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
 def _Z_0(x, A_t, nabla_t, rho_t, divisor_t):
     _A_t = A_t - x * nabla_t 
     return soft_thresholding_od(_A_t, lamda=theta / (rho_t * divisor_t))            
Example #12
0
 def _Z_0(x, A_t, g_t, nabla_t, rho_t, divisor_t):
     _A_t = A_t + x * g_t * nabla_t
     A_t = 0.5 * (_A_t + _A_t.transpose(1, 0))
     return soft_thresholding_od(A_t, lamda=theta / (rho_t * divisor_t))