Esempio n. 1
0
def objective_kernel(theta, K, psi, kernel, times):
    psi, _, _ = check_norm_prox(psi)
    try:
        # this works if it is a ExpSineSquared or RBF kernel
        kernel = kernel(length_scale=theta)(times)
    except TypeError:
        # maybe it's a ConstantKernel
        kernel = kernel(constant_value=theta)(times)

    obj = 0
    for m in range(1, K.shape[0]):
        # all possible markovians jumps
        obj += np.sum(np.array(list(map(psi, K[m:] - K[:-m]))) * np.diag(kernel, m))

    return obj
    def _fit(self, emp_cov, n_samples):
        if self.kernel_psi is None:
            n_times = emp_cov.shape[0]

            if self.kernel_phi is None or callable(self.kernel_phi):
                # raise ValueError('not implemented')
                # mimic LTGL
                kernel_phi = np.eye(n_times)
                np.fill_diagonal(kernel_phi[:, 1:], self.eta)
                np.fill_diagonal(kernel_phi[1:], self.eta)

            # discover best kernel parameter via EM
            # initialise precision matrices, as warm start
            self.precision_ = init_precision(emp_cov, mode=self.init)
            self.latent_ = np.zeros_like(self.precision_)
            theta_old = np.zeros(n_times * (n_times - 1) // 2)
            kernel_psi = np.eye(n_times)

            psi, _, _ = check_norm_prox(self.psi)
            if self.n_clusters is None:
                self.n_clusters = n_times

            for i in range(self.max_iter_ext):
                # E step - discover best kernel
                theta = precision_similarity(self.get_precision(), psi)

                # if i > 0 and np.linalg.norm(theta_old -
                #                             theta) / theta.size < self.eps:
                #     break

                # kernel_psi = theta * self.beta
                kernel_psi = theta
                labels_pred = AgglomerativeClustering(
                    n_clusters=self.n_clusters,
                    affinity="precomputed",
                    linkage="complete").fit_predict(kernel_psi)
                if i > 0 and np.linalg.norm(labels_pred - labels_pred_old
                                            ) / labels_pred.size < self.eps:
                    break
                kernel_psi = kernels.RBF(0.0001)(
                    labels_pred[:, None]) + kernels.RBF(self.beta)(
                        np.arange(n_times)[:, None])

                # M step - fix the kernel matrix
                out = kernel_latent_time_graphical_lasso(
                    emp_cov,
                    alpha=self.alpha,
                    tau=self.tau,
                    rho=self.rho,
                    kernel_phi=self.kernel_phi,
                    kernel_psi=kernel_psi,
                    n_samples=n_samples,
                    tol=self.tol,
                    rtol=self.rtol,
                    psi=self.psi,
                    max_iter=self.max_iter,
                    verbose=self.verbose,
                    return_n_iter=True,
                    return_history=self.return_history,
                    update_rho_options=self.update_rho_options,
                    compute_objective=self.compute_objective,
                    init=self.precision_,
                )

                if self.return_history:
                    (self.precision_, self.latent_, self.covariance_,
                     self.history_, self.n_iter_) = out
                else:
                    (self.precision_, self.latent_, self.covariance_,
                     self.n_iter_) = out
                theta_old = theta
                labels_pred_old = labels_pred
            else:
                warnings.warn("theta did not converge.")
            self.similarity_matrix_ = kernel_psi
        else:
            if callable(self.kernel_phi):
                try:
                    # this works if it is a ExpSineSquared or RBF kernel
                    kernel_phi = self.kernel_phi(
                        length_scale=self.ker_phi_param)(self.classes_[:,
                                                                       None])
                except TypeError:
                    # maybe it's a ConstantKernel
                    kernel_phi = self.kernel_phi(
                        constant_value=self.ker_phi_param)(self.classes_[:,
                                                                         None])

            else:
                kernel_phi = self.kernel_phi
                if kernel_phi.shape[0] != self.classes_.size:
                    raise ValueError(
                        "kernel_phi size does not match classes of samples, "
                        "got {} classes and kernel_phi has shape {}".format(
                            self.classes_.size, kernel_phi.shape[0]))
            if callable(self.kernel_psi):
                try:
                    # this works if it is a ExpSineSquared kernel
                    kernel_psi = self.kernel_psi(
                        length_scale=self.ker_psi_param)(self.classes_[:,
                                                                       None])
                except TypeError:
                    # maybe it's a ConstantKernel
                    kernel_psi = self.kernel_psi(
                        constant_value=self.ker_psi_param)(self.classes_[:,
                                                                         None])
            else:
                kernel_psi = self.kernel_psi
                if kernel_psi.shape[0] != self.classes_.size:
                    raise ValueError(
                        "kernel_psi size does not match classes of samples, "
                        "got {} classes and kernel_psi has shape {}".format(
                            self.classes_.size, kernel_psi.shape[0]))

            out = kernel_latent_time_graphical_lasso(
                emp_cov,
                alpha=self.alpha,
                tau=self.tau,
                rho=self.rho,
                kernel_phi=kernel_phi,
                kernel_psi=kernel_psi,
                n_samples=n_samples,
                tol=self.tol,
                rtol=self.rtol,
                psi=self.psi,
                max_iter=self.max_iter,
                verbose=self.verbose,
                return_n_iter=True,
                return_history=self.return_history,
                update_rho_options=self.update_rho_options,
                compute_objective=self.compute_objective,
                init=self.init,
            )
            if self.return_history:
                (self.precision_, self.latent_, self.covariance_,
                 self.history_, self.n_iter_) = out
            else:
                (self.precision_, self.latent_, self.covariance_,
                 self.n_iter_) = out

        return self
Esempio n. 3
0
def _fit_time_poisson_model(
    X,
    alpha=0.01,
    rho=1,
    kernel=None,
    max_iter=100,
    verbose=False,
    psi="laplacian",
    gamma=0.1,
    tol=1e-4,
    rtol=1e-4,
    return_history=False,
    return_n_iter=True,
    compute_objective=True,
    stop_at=None,
    stop_when=1e-4,
    n_cores=-1,
):
    """Time-varying graphical model solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i, X_i) + alpha ||K_i||_{od,1}
            + sum_{s>t}^T k(s,t) Psi(K_s - K_t)

    where X is a matrix n_i x D, the observations at time i and the
    log-likelihood changes according to the distribution.

    Parameters
    ----------
    X : ndarray, shape (n_times, n_samples, n_features)
        Data matrix. It has to contain two values: 0 or 1, -1 or 1.
        alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    X : numpy.array, 2-dimensional
        Solution to the problem.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    n_times, n_samples, n_features = X.shape
    n_samples = np.array([n_samples] * n_times)

    if kernel is None:
        kernel = np.eye(n_times)

    K = np.zeros((n_times, n_features, n_features))

    Z_M = {}
    U_M = {}
    Z_M_old = {}

    for m in range(1, n_times):
        # all possible non markovians jumps
        Z_L = K.copy()[:-m]
        Z_R = K.copy()[m:]
        Z_M[m] = (Z_L, Z_R)

        U_L = np.zeros_like(Z_L)
        U_R = np.zeros_like(Z_R)
        U_M[m] = (U_L, U_R)

        Z_L_old = np.zeros_like(Z_L)
        Z_R_old = np.zeros_like(Z_R)
        Z_M_old[m] = (Z_L_old, Z_R_old)

    checks = [convergence(obj=objective(X, K, Z_M, alpha, kernel, psi))]
    for iteration_ in range(max_iter):
        # update K
        A = np.zeros_like(K)
        for m in range(1, n_times):
            A[:-m] += Z_M[m][0] - U_M[m][0]
            A[m:] += Z_M[m][1] - U_M[m][1]

        A /= n_times
        A += A.transpose(0, 2, 1)
        A /= 2.0
        # K_new = np.zeros_like(K)

        for t in range(n_times):
            thetas_pred = []
            for v in range(n_features):
                inner_verbose = max(0, verbose - 1)
                res = fit_each_variable(X[t, :, :],
                                        v,
                                        alpha,
                                        tol=tol,
                                        verbose=inner_verbose,
                                        A=A[t, :, :],
                                        T=n_times,
                                        rho=rho)
                thetas_pred.append(res[0])

            K[t, :, :] = build_adjacency_matrix(thetas_pred, "union")

        # other Zs
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            A_L = K[:-m] + U_L
            A_R = K[m:] + U_R
            if not psi_node_penalty:
                prox_e = prox_psi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel, m)[:, None, None] / rho)
                Z_L = 0.5 * (A_L + A_R - prox_e)
                Z_R = 0.5 * (A_L + A_R + prox_e)
            else:
                Z_L, Z_R = prox_psi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            Z_M[m] = (Z_L, Z_R)

            # update other residuals
            U_L += K[:-m] - Z_L
            U_R += K[m:] - Z_R

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            sum(
                squared_norm(K[:-m] - Z_M[m][0]) +
                squared_norm(K[m:] - Z_M[m][1]) for m in range(1, n_times)))

        snorm = rho * np.sqrt(
            sum(
                squared_norm(Z_M[m][0] - Z_M_old[m][0]) +
                squared_norm(Z_M[m][1] - Z_M_old[m][1])
                for m in range(1, n_times)))

        obj = objective(X, K, Z_M, alpha, kernel,
                        psi) if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=n_features * n_times * tol + rtol * max(
                np.sqrt(
                    sum(
                        squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1])
                        for m in range(1, n_times))),
                np.sqrt(
                    squared_norm(K) + sum(
                        squared_norm(K[:-m]) + squared_norm(K[m:])
                        for m in range(1, n_times))),
            ),
            e_dual=n_features * n_times * tol + rtol * rho * np.sqrt(
                sum(
                    squared_norm(U_M[m][0]) + squared_norm(U_M[m][1])
                    for m in range(1, n_times))),
        )
        for m in range(1, n_times):
            Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy())

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        # U_0 *= rho / rho_new
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            U_L *= rho / rho_new
            U_R *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [K]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
Esempio n. 4
0
    def fit(self, X, y):
        X, y = check_X_y(X,
                         y,
                         accept_sparse=False,
                         dtype=np.float64,
                         order="C",
                         ensure_min_features=2,
                         estimator=self)

        self.classes_, n_samples = np.unique(y, return_counts=True)
        self.data = X.copy()
        if np.unique(self.data).size != 2:
            raise ValueError("Using the ising distribution your data has "
                             "to contain only two values, either 0 and 1 "
                             "or -1, 1")
        X = np.array([X[y == cl] for cl in self.classes_])
        print(X.shape)
        if self.kernel is None:
            # from scipy.optimize import minimize
            # discover best kernel parameter via EM
            # initialise precision matrices, as warm start
            self.precision_ = np.random.rand(X.shape[0], X.shape[0])
            n_times = self.precision_.shape[0]
            kernel = np.eye(n_times)

            psi, _, _ = check_norm_prox(self.psi)
            if self.n_clusters is None:
                self.n_clusters = n_times
            labels_pred_old = 0
            for i in range(self.max_iter_ext):
                theta = precision_similarity(self.precision_, psi)
                kernel = theta
                labels_pred = AgglomerativeClustering(
                    n_clusters=self.n_clusters,
                    affinity="precomputed",
                    linkage="complete").fit_predict(kernel)
                if i > 0 and np.linalg.norm(labels_pred - labels_pred_old
                                            ) / labels_pred.size < self.eps:
                    break
                kernel = kernels.RBF(0.0001)(
                    labels_pred[:, None]) + kernels.RBF(self.beta)(
                        np.arange(n_times)[:, None])

                out = _fit_time_poisson_model(
                    X,
                    alpha=self.alpha,
                    rho=self.rho,
                    kernel=kernel,
                    tol=self.tol,
                    rtol=self.rtol,
                    psi=self.psi,
                    max_iter=self.max_iter,
                    verbose=self.verbose,
                    return_n_iter=True,
                    return_history=self.return_history,
                    update_rho_options=self.update_rho_options,
                    compute_objective=self.compute_objective,
                    init=self.precision_,
                )

                if self.return_history:
                    (self.precision_, self.history_, self.n_iter_) = out
                else:
                    self.precision_, self.n_iter_ = out
                labels_pred_old = labels_pred

            else:
                warnings.warn("theta did not converge.")
            self.similarity_matrix_ = kernel

        else:
            kernel = self.kernel
            if kernel.shape[0] != self.classes_.size:
                raise ValueError(
                    "Kernel size does not match classes of samples, "
                    "got {} classes and kernel has shape {}".format(
                        self.classes_.size, kernel.shape[0]))

            out = _fit_time_poisson_model(
                X,
                alpha=self.alpha,
                rho=self.rho,
                kernel=kernel,
                tol=self.tol,
                rtol=self.rtol,
                psi=self.psi,
                max_iter=self.max_iter,
                verbose=self.verbose,
                return_n_iter=True,
                return_history=self.return_history,
                update_rho_options=self.update_rho_options,
                compute_objective=self.compute_objective,
                init=self.precision_,
            )
            if self.return_history:
                (self.precision_, self.history_, self.n_iter_) = out
            else:
                self.precision_, self.n_iter_ = out

        return self
def gradient_equal_time_graphical_lasso(S, K_init, max_iter, loss, C, theta,
                                        rho, mult, weights, m, eps, psi, gamma,
                                        tol, rtol, verbose, return_history,
                                        return_n_iter, mode, compute_objective,
                                        stop_at, stop_when,
                                        update_rho_options):
    """Equality constrained time-varying graphical LASSO solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1})
        s.t. objective = c_i for i = 1, ..., T

    where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    gamma: float, optional
        Kernel parameter when psi is chosen to be 'kernel'.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    if loss == 'LL':
        loss_func = neg_logl
    else:
        loss_func = dtrace

    T = S.shape[0]
    I = np.eye(S.shape[1])

    Z_0 = K_init

    out_obj = []

    checks = [convergence(obj=penalty_objective(Z_0, Z_0, Z_0, psi, theta))]

    def _Z_0(x1, x2, Z_0, loss_res, nabla_con, nabla_pen):
        A = Z_0 - x2 * (1 - theta) * nabla_pen
        # A = Z_0 - x1 * nabla_con - x2 * (1 - theta) * nabla_pen
        A -= x1 * loss_res[:, None, None] * nabla_con
        return soft_thresholding_od(A, lamda=x2 * theta), A

    # constrained optimisation via line search
    def _f(x, _Z_0, Z_0, loss_res, nabla_con, nabla_pen, loss_func, S, C):
        _Z_0, A = _Z_0(x[0], x[1], Z_0, loss_res, nabla_con, nabla_pen)
        loss_res = loss_gen(loss_func, S, _Z_0) - C
        # loss_res_A = loss_gen(loss_func, S, A) - C
        # return squared_norm(loss_res) + squared_norm(loss_res - loss_res_A)
        return squared_norm(loss_res) + squared_norm(_Z_0 - A) / (S.shape[1] *
                                                                  S.shape[2])

    loss_res = loss_gen(loss_func, S, Z_0) - C

    for iteration_ in range(max_iter):
        if loss_func.__name__ == 'neg_logl':
            nabla_con = np.array(
                [S_t - np.linalg.inv(A_t) for (S_t, A_t) in zip(S, Z_0)])
            # nabla = np.array([S_t - np.linalg.inv(Z_0_t) for (S_t, Z_0_t) in zip(S, Z_0_pre)])
        elif loss_func.__name__ == 'dtrace':
            nabla_con = np.array([(2 * A_t @ S_t - I)
                                  for (S_t, A_t) in zip(S, Z_0)])
            # nabla = np.array([(2 * Z_0_t @ S_t - I) for (S_t, Z_0_t) in zip(S, Z_0_pre)])

        nabla_pen = grad_laplacian(Z_0)

        out = minimize(partial(_f,
                               _Z_0=_Z_0,
                               Z_0=Z_0,
                               loss_res=loss_res,
                               nabla_con=nabla_con,
                               nabla_pen=nabla_pen,
                               loss_func=loss_func,
                               S=S,
                               C=C),
                       x0=np.zeros(2),
                       method='Nelder-Mead',
                       tol=1e-4)
        Z_0, _ = _Z_0(out.x[0], out.x[1], Z_0, loss_res, nabla_con, nabla_pen)
        loss_res = loss_gen(loss_func, S, Z_0) - C
        out_obj.append(penalty_objective(Z_0, Z_0[:-1], Z_0[1:], psi, theta))
        if not iteration_ % 100:
            print(iteration_)
            print(np.max(loss_res), np.mean(loss_res))
            print(out_obj[-1])
        # print(out_obj[-1], np.max(loss_res), np.mean(loss_res))
    else:
        warnings.warn("Objective did not converge.")

    print(iteration_, out_obj[-1])
    # print(check.rnorm, check.e_pri)
    # print(check.snorm, check.e_dual)

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
Esempio n. 6
0
def latent_time_matrix_decomposition(emp_cov,
                                     alpha=0.01,
                                     tau=1.,
                                     rho=1.,
                                     beta=1.,
                                     eta=1.,
                                     max_iter=100,
                                     verbose=False,
                                     psi='laplacian',
                                     phi='laplacian',
                                     mode='admm',
                                     tol=1e-4,
                                     rtol=1e-4,
                                     assume_centered=False,
                                     return_history=False,
                                     return_n_iter=True,
                                     update_rho_options=None,
                                     compute_objective=True):
    r"""Latent variable time-varying matrix decomposition solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T || S_i-(K_i-L_i)||^2 + alpha ||K_i||_{od,1}
            + tau ||L_i||_*
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})
            + eta sum_{i=2}^T Phi(L_i - L_{i-1})

    where S is the matrix to decompose.

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Matrix to decompose.
    alpha, tau, beta, eta : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    phi, prox_phi, phi_node_penalty = check_norm_prox(phi)

    Z_0 = np.zeros_like(emp_cov)
    Z_1 = np.zeros_like(Z_0)[:-1]
    Z_2 = np.zeros_like(Z_0)[1:]
    W_0 = np.zeros_like(Z_0)
    W_1 = np.zeros_like(Z_1)
    W_2 = np.zeros_like(Z_2)

    X_0 = np.zeros_like(Z_0)
    X_1 = np.zeros_like(Z_1)
    X_2 = np.zeros_like(Z_2)
    U_1 = np.zeros_like(W_1)
    U_2 = np.zeros_like(W_2)

    R_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)
    W_1_old = np.zeros_like(W_1)
    W_2_old = np.zeros_like(W_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        R = (rho * A + 2 * emp_cov) / (2 + rho)

        # update Z_0
        A = R + W_0 + X_0
        A[:-1] += Z_1 - X_1
        A[1:] += Z_2 - X_2
        A /= divisor[:, None, None]
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # Z_0 = np.array(map(soft_thresholding_, A))
        Z_0 = soft_thresholding(A,
                                lamda=alpha / (rho * divisor[:, None, None]))

        # update Z_1, Z_2
        A_1 = Z_0[:-1] + X_1
        A_2 = Z_0[1:] + X_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * beta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update W_0
        A = Z_0 - R - X_0
        A[:-1] += W_1 - U_1
        A[1:] += W_2 - U_2
        A /= divisor[:, None, None]
        A += A.transpose(0, 2, 1)
        A /= 2.

        W_0 = np.array([
            prox_trace_indicator(a, lamda=tau / (rho * div))
            for a, div in zip(A, divisor)
        ])

        # update W_1, W_2
        A_1 = W_0[:-1] + U_1
        A_2 = W_0[1:] + U_2
        if not phi_node_penalty:
            prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho)
            W_1 = .5 * (A_1 + A_2 - prox_e)
            W_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            W_1, W_2 = prox_phi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * eta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        X_0 += R - Z_0 + W_0
        X_1 += Z_0[:-1] - Z_1
        X_2 += Z_0[1:] - Z_2
        U_1 += W_0[:-1] - W_1
        U_2 += W_0[1:] - W_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(R - Z_0 + W_0) + squared_norm(Z_0[:-1] - Z_1) +
            squared_norm(Z_0[1:] - Z_2) + squared_norm(W_0[:-1] - W_1) +
            squared_norm(W_0[1:] - W_2))

        snorm = rho * np.sqrt(
            squared_norm(R - R_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old) + squared_norm(W_1 - W_1_old) +
            squared_norm(W_2 - W_2_old))

        obj = objective(emp_cov, R, Z_0, Z_1, Z_2, W_0, W_1, W_2,
                        alpha, tau, beta, eta, psi, phi) \
            if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * max(
                np.sqrt(
                    squared_norm(R) + squared_norm(Z_1) + squared_norm(Z_2) +
                    squared_norm(W_1) + squared_norm(W_2)),
                np.sqrt(
                    squared_norm(Z_0 - W_0) + squared_norm(Z_0[:-1]) +
                    squared_norm(Z_0[1:]) + squared_norm(W_0[:-1]) +
                    squared_norm(W_0[1:]))),
            e_dual=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * rho *
            (np.sqrt(
                squared_norm(X_0) + squared_norm(X_1) + squared_norm(X_2) +
                squared_norm(U_1) + squared_norm(U_2))))

        R_old = R.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()
        W_1_old = W_1.copy()
        W_2_old = W_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        X_0 *= rho / rho_new
        X_1 *= rho / rho_new
        X_2 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [Z_0, W_0]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
Esempio n. 7
0
def time_graphical_lasso(
    emp_cov,
    alpha=0.01,
    rho=1,
    beta=1,
    max_iter=100,
    n_samples=None,
    verbose=False,
    psi="laplacian",
    tol=1e-4,
    rtol=1e-4,
    return_history=False,
    return_n_iter=True,
    mode="admm",
    compute_objective=True,
    stop_at=None,
    stop_when=1e-4,
    update_rho_options=None,
    init="empirical",
):
    """Time-varying graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(S_i, K_i) + alpha*||K_i||_{od,1}
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    Z_0 = init_precision(emp_cov, mode=init)
    Z_1 = Z_0.copy()[:-1]  # np.zeros_like(emp_cov)[:-1]
    Z_2 = Z_0.copy()[1:]  # np.zeros_like(emp_cov)[1:]

    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    if n_samples is None:
        n_samples = np.ones(emp_cov.shape[0])

    checks = [convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_1, Z_2, alpha, beta, psi))]
    for iteration_ in range(max_iter):
        # update K
        A = Z_0 - U_0
        A[:-1] += Z_1 - U_1
        A[1:] += Z_2 - U_2
        A /= divisor[:, None, None]
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # K = np.array(map(soft_thresholding_, A))
        A += A.transpose(0, 2, 1)
        A /= 2.0

        A *= -rho * divisor[:, None, None] / n_samples[:, None, None]
        A += emp_cov

        K = np.array([prox_logdet(a, lamda=ni / (rho * div)) for a, div, ni in zip(A, divisor, n_samples)])

        # update Z_0
        A = K + U_0
        A += A.transpose(0, 2, 1)
        A /= 2.0
        Z_0 = soft_thresholding(A, lamda=alpha / rho)

        # other Zs
        A_1 = K[:-1] + U_1
        A_2 = K[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2.0 * beta / rho)
            Z_1 = 0.5 * (A_1 + A_2 - prox_e)
            Z_2 = 0.5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(
                np.concatenate((A_1, A_2), axis=1),
                lamda=0.5 * beta / rho,
                rho=rho,
                tol=tol,
                rtol=rtol,
                max_iter=max_iter,
            )

        # update residuals
        U_0 += K - Z_0
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(K - Z_0) + squared_norm(K[:-1] - Z_1) + squared_norm(K[1:] - Z_2))

        snorm = rho * np.sqrt(squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old))

        obj = objective(n_samples, emp_cov, Z_0, K, Z_1, Z_2, alpha, beta, psi) if compute_objective else np.nan

        # if np.isinf(obj):
        #     Z_0 = Z_0_old
        #     break

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(K.size + 2 * Z_1.size) * tol
            + rtol
            * max(
                np.sqrt(squared_norm(Z_0) + squared_norm(Z_1) + squared_norm(Z_2)),
                np.sqrt(squared_norm(K) + squared_norm(K[:-1]) + squared_norm(K[1:])),
            ),
            e_dual=np.sqrt(K.size + 2 * Z_1.size) * tol
            + rtol * rho * np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
            # precision=Z_0.copy()
        )
        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U_0 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new

        # assert is_pos_def(Z_0)
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
    tgl_g = GradientEqualTimeGraphicalLasso(max_iter=max_iter,
                                            loss=loss,
                                            c_level=c_level,
                                            theta=theta,
                                            rho=rho,
                                            mult=mult,
                                            weights=weights,
                                            m=m,
                                            eps=eps,
                                            psi=psi)
    emp_inv_score_g, baseline_score_g, fit_score_g, pre_g = tgl_g.fit_cov(
        X_cov).eval_cov_pre()
    toc = time.perf_counter()
    print('Gradient Running Time :{}'.format(toc - tic))

    psi, prox_psi, psi_node_penalty = check_norm_prox(tgl_g.psi)

    pre_tgl = {}
    fit_score_tgl_thres = {}
    for i in [1e-4, 0]:
        pre_tgl[i] = np.array([k * (np.abs(k) >= i) for k in pre_])
        tgl_g.precision_ = pre_tgl[i]
        emp_inv_score, baseline_score, fit_score_tgl_thres[
            i], _ = tgl_g.eval_cov_pre()
        print(
            'Vanilla Objective',
            penalty_objective(pre_tgl[i], pre_tgl[i][:-1], pre_tgl[i][1:], psi,
                              tgl_g.theta))

    pre = {}
    fit_score_thres = {}
def taylor_time_graphical_lasso(
    S, K_init, max_iter, loss, C, theta, rho, mult, 
    weights, m, eps, psi, gamma, tol, rtol, verbose, 
    return_history, return_n_iter, mode, compute_objective, 
    stop_at, stop_when, update_rho_options
    ):
    """Equality constrained time-varying graphical LASSO solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1})
        s.t. objective = c_i for i = 1, ..., T

    where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    gamma: float, optional
        Kernel parameter when psi is chosen to be 'kernel'.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    if loss == 'LL':
        loss_func = neg_logl
    else:
        loss_func = dtrace

    T = S.shape[0]
    S_flat = S.copy().reshape(T, S.shape[1] * S.shape[2])
    I_flat = np.diagflat(S.shape[1]).ravel()

    K = K_init.copy()
    Z_0 = K_init.copy()
    Z_1 = Z_0.copy()[:-1] 
    Z_2 = Z_0.copy()[1:]  

    u = np.zeros(T)
    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = Z_0.copy()
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    # divisor for consensus variables, accounting for one less matrix for t = 0 and t = T
    divisor = np.full(T, 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    rho = rho * np.ones(T)    
    if weights[0] is not None:
        if weights[0] == 'rbf':
            weights = rbf_weights(T, weights[1], mult)
        elif weights[0] == 'exp':
            weights = exp_weights(T, weights[1], mult)
        elif weights[0] == 'lin':
            weights = lin_weights(T, weights[1], mult)
        con_obj = {}
        for t in range(T):
            con_obj[t] = []

    con_obj_mean = []
    con_obj_max = []

    # loss residuals
    loss_res = np.zeros(T)
    loss_init = loss_gen(loss_func, S, Z_0_old)
    loss_res_old = loss_init - C

    # loss_diff = C - loss_init
    # C_  = C - loss_diff

    out_obj = []

    checks = [
        convergence(
            obj=penalty_objective(Z_0, Z_1, Z_2, psi, theta))
    ]


    def _K(x, A_t, g_t, nabla_t, nabla_t_T_A_t, nabla_t_T_nabla_t, rho_t, divisor_t):
        _K_t = (A_t + x * g_t * nabla_t - 
                    (x * nabla_t_T_A_t + x ** 2 * g_t * nabla_t_T_nabla_t) * nabla_t  / 
                    (divisor_t * rho_t + x * nabla_t_T_nabla_t)
                ).reshape(S.shape[1], S.shape[2])
        _K_t /= (rho_t * divisor_t)
        return 0.5 * (_K_t + _K_t.transpose(1, 0))


    # def _K(x, A_t, nabla_t):
    #     _A_t = A_t - x * nabla_t
    #     return _A_t


    # constrained optimisation via line search
    def _f(x, _K, A_t, g_t, nabla_t, nabla_t_T_A_t, nabla_t_T_nabla_t, rho_t, divisor_t, 
            loss_func, S_t, c_t, loss_res_old_t, nabla_t_T_K_old_t):
        _K_t = _K(x, A_t, g_t, nabla_t, nabla_t_T_A_t, nabla_t_T_nabla_t, rho_t, divisor_t)
        loss_res_t = loss_func(S_t, _K_t) - c_t
        return loss_res_t ** 2 + (loss_res_t - loss_res_old_t - nabla_t @ _K_t.ravel() + nabla_t_T_K_old_t) ** 2


    # # constrained optimisation via line search
    # def _f(x, _K, A_t, nabla_t, loss_func, S_t, c_t, loss_res_old_t):
    #     _K_t = _K(x, A_t, nabla_t)
    #     loss_res_t = loss_func(S_t, _K_t) - c_t
    #     return loss_res_t ** 2 + (loss_res_t - loss_res_old_t - np.sum(nabla_t * (_K_t - A_t))) ** 2


    for iteration_ in range(max_iter):
        # update K
        A = rho[:, None, None] * (Z_0 - U_0)
        A[:-1] += rho[:-1, None, None] * (Z_1 - U_1)
        A[1:] += rho[1:, None, None] * (Z_2 - U_2)
        # A += A.transpose(0, 2, 1)
        # A /= 2. 
        # A /= (rho * divisor)[:, None, None]

        # loss_res_pre = loss_gen(loss_func, S, A) - C

        if loss_func.__name__ == 'neg_logl':
            nabla = np.array([S_t - np.linalg.inv(K_t).ravel() for (S_t, K_t) in zip(S_flat, K)])
            # nabla = np.array([S_t - np.linalg.inv(K_t) for (S_t, K_t) in zip(S, A)])
        elif loss_func.__name__ == 'dtrace': 
            nabla = np.array([(2 * K_t.ravel() @ S_t - I) for (S_t, K_t) in zip(S_flat, K)])
            # nabla = np.array([(2 * K_t @ S_t - I) for (S_t, K_t) in zip(S, K)])
        nabla_T_K_old = np.array([nabla_t @ K_t.ravel() for (nabla_t, K_t) in zip(nabla, K)])
        # nabla_T_K_old = np.array([np.sum(nabla_t * K_t) for (nabla_t, K_t) in zip(nabla, K)])
        g = nabla_T_K_old - loss_res_old
        nabla_T_A = np.array([nabla_t @ A_t.ravel() for (nabla_t, A_t) in zip(nabla, A)])
        nabla_T_nabla =  np.einsum('ij,ij->i', nabla, nabla)
        
        if iteration_ == 0:
            nabla = np.zeros_like(S_flat)
            # nabla = np.zeros_like(S)
            nabla_T_K_old = np.zeros(T)
            g = np.zeros(T)
            nabla_T_A = np.zeros(T)
            nabla_T_nabla = np.zeros(T)

        col = []

        for t in range(T):
            out = minimize_scalar(
                    partial(_f, _K=_K, A_t=A[t].ravel(), g_t=g[t], nabla_t=nabla[t], 
                            nabla_t_T_A_t=nabla_T_A[t], nabla_t_T_nabla_t=nabla_T_nabla[t], 
                            rho_t=rho[t], divisor_t=divisor[t], loss_func=loss_func, 
                            S_t=S[t], c_t=C[t], loss_res_old_t=loss_res_old[t], 
                            nabla_t_T_K_old_t=nabla_T_K_old[t])
                    )
            # out = minimize_scalar(
            #         partial(_f, _K=_K, A_t=A[t], nabla_t=nabla[t], loss_func=loss_func, 
            #                 S_t=S[t], c_t=C[t], loss_res_old_t=loss_res_pre[t])
            #         )
            K[t] = _K(out.x, A[t].ravel(), g[t], nabla[t], nabla_T_A[t], nabla_T_nabla[t], rho[t], divisor[t])
            # K[t] = _K(out.x, A[t], nabla[t])
            loss_res[t] = loss_func(S[t], K[t]) - C[t]
            # u[t] += loss_res[t]    
            if weights[0] is not None:
                con_obj[t].append(loss_res[t] ** 2)    
                if len(con_obj[t]) > m and np.mean(con_obj[t][-m:-int(m/2)]) < np.mean(con_obj[t][-int(m/2):]) and loss_res[t] > eps:
                    col.append(t)
                
        # update Z_0
        _Z_0 = K + U_0
        _Z_0 += _Z_0.transpose(0, 2, 1)
        _Z_0 /= 2.
        Z_0 = soft_thresholding_od(_Z_0, lamda=theta / rho[:, None, None])
                
        # update Z_1, Z_2
        A_1 = Z_0[:-1] + U_1
        A_2 = Z_0[1:] + U_2
        if not psi_node_penalty:
            A_add = A_2 + A_1
            A_sub = A_2 - A_1
            prox_e_1 = prox_psi(A_sub, lamda=2. * (1 - theta) / rho[:-1, None, None])
            prox_e_2 = prox_psi(A_sub, lamda=2. * (1 - theta) / rho[1:, None, None])
            Z_1 = .5 * (A_add - prox_e_1)
            Z_2 = .5 * (A_add + prox_e_2)
        # TODO: Fix for rho vector
        # else:
        #     if weights is not None:
        #         Z_1, Z_2 = prox_psi(
        #             np.concatenate((A_1, A_2), axis=1), lamda=.5 * (1 - theta) / rho[t],
        #             rho=rho[t], tol=tol, rtol=rtol, max_iter=max_iter)

        # update residuals
        con_obj_mean.append(np.mean(loss_res) ** 2)
        con_obj_max.append(np.max(loss_res))

        U_0 += K - Z_0 
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(K - Z_0) + 
            squared_norm(K[:-1] - Z_1) +
             squared_norm(K[1:] - Z_2)
            )

        loss_res_old = loss_res.copy()
        
        snorm = np.sqrt(
                    squared_norm(rho[:, None, None] * (Z_0 - Z_0_old)) + 
                    squared_norm(rho[:-1, None, None] * (Z_1 - Z_1_old)) + 
                    squared_norm(rho[1:, None, None] * (Z_2 - Z_2_old))
                )

        e_dual = np.sqrt(Z_0.size + 2 * Z_1.size) * tol + rtol * np.sqrt(
                    squared_norm(rho[:, None, None] * U_0) + 
                    squared_norm(rho[:-1, None, None] * U_1) + 
                    squared_norm(rho[1:, None, None] * U_2)
                 )

        obj = objective(loss_res, Z_0, Z_1, Z_2, psi, theta)

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(loss_res.size + Z_0.size + 2 * Z_1.size) * tol + rtol * 
                (
                max(np.sqrt(squared_norm(Z_0)), np.sqrt(squared_norm(K))) +
                max(np.sqrt(squared_norm(Z_1)), np.sqrt(squared_norm(K[:-1]))) + 
                max(np.sqrt(squared_norm(Z_2)), np.sqrt(squared_norm(K[1:])))
                ),
            e_dual=e_dual
        )

        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print(
                "obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        out_obj.append(penalty_objective(Z_0, Z_0[:-1], Z_0[1:], psi, theta))
        if not iteration_ % 100:
            print(iteration_)
            print(np.max(con_obj_max[-1]), np.mean(loss_res))
            print(out_obj[-1])
        checks.append(check)

        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        if weights[0] is None:
            if len(con_obj_mean) > m:
                if np.mean(con_obj_mean[-m:-int(m/2)]) < np.mean(con_obj_mean[-int(m/2):]) and np.max(loss_res) > eps:
                # or np.mean(con_obj_max[-100:-50]) < np.mean(con_obj_max[-50:])) # np.mean(loss_res) > 0.25:
                    print("Rho Mult", mult * rho[0], iteration_, np.mean(loss_res), con_obj_max[-1])
                    # loss_diff /= 5            
                    # C_ = C - loss_diff           
                    # resscale scaled dual variables
                    rho = mult * rho
                    # u /= mult
                    U_0 /= mult
                    U_1 /= mult
                    U_2 /= mult
                    con_obj_mean = []
                    con_obj_max = []
        else:
            for t in col:
                rho *= weights[t]
                # u /= weights[t]
                U_0 /= weights[t][:, None, None]
                U_1 /= weights[t][:-1, None, None]
                U_2 /= weights[t][1:, None, None]
                con_obj[t] = []
                print('Mult', iteration_, t, rho[t])    
    else:
        warnings.warn("Objective did not converge.")

    print(iteration_, out_obj[-1])
    # print(out_obj)
    print(check.rnorm, check.e_pri)
    print(check.snorm, check.e_dual)

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
def latent_time_graph_lasso(
        emp_cov, alpha=1, tau=1, rho=1, beta=1., eta=1., max_iter=1000,
        verbose=False, psi='laplacian', phi='laplacian', mode=None,
        tol=1e-4, rtol=1e-2, assume_centered=False,
        return_history=False, return_n_iter=True):
    r"""Time-varying latent variable graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1}
            + tau ||L_i||_*
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})
            + eta sum_{i=2}^T Phi(L_i - L_{i-1})

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    data_list : list of 2-dimensional matrices.
        Input matrices.
    alpha, tau : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.
    """
    psi, prox_psi = check_norm_prox(psi)
    phi, prox_phi = check_norm_prox(phi)

    # S = np.array(map(empirical_covariance, data_list))
    # n_samples = np.array([s for s in [1.]])

    K = np.zeros_like(emp_cov)
    Z_0 = np.zeros_like(K)
    Z_1 = np.zeros_like(K)[:-1]
    Z_2 = np.zeros_like(K)[1:]
    W_0 = np.zeros_like(K)
    W_1 = np.zeros_like(K)[:-1]
    W_2 = np.zeros_like(K)[1:]
    X_0 = np.zeros_like(K)
    X_1 = np.zeros_like(K)[:-1]
    X_2 = np.zeros_like(K)[1:]

    Z_consensus = np.zeros_like(K)
    # Z_consensus_old = np.zeros_like(K)
    W_consensus = np.zeros_like(K)
    # W_consensus_old = np.zeros_like(K)
    R_old = np.zeros_like(K)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(K.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        A[:-1] += Z_1 - W_1 - X_1
        A[1:] += Z_2 - W_2 - X_2
        A /= divisor[:, None, None]

        # A += np.array(map(np.transpose, A))
        # A /= 2.

        # A *= - rho / n_samples[:, None, None]
        A *= - rho
        A += emp_cov

        R = np.array([prox_logdet(a, lamda=1. / rho) for a in A])

        # update Z_0
        # Zold = Z
        # X_hat = alpha * X + (1 - alpha) * Zold
        soft_thresholding = partial(soft_thresholding_sign, lamda=alpha / rho)
        Z_0 = np.array(map(soft_thresholding, R + W_0 + X_0))

        # update Z_1, Z_2
        # prox_l = partial(prox_laplacian, beta=2. * beta / rho)
        # prox_e = np.array(map(prox_l, K[1:] - K[:-1] + U_2 - U_1))
        if beta != 0:
            A_1 = R[:-1] + W_1 + X_1
            # A_1 = Z_0[:-1].copy()
            A_2 = R[1:] + W_2 + X_2
            # A_2 = Z_0[1:].copy()
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1 = Z_0[:-1].copy()
            Z_2 = Z_0[1:].copy()

        # update W_0
        A = Z_0 - R - X_0
        W_0 = np.array(map(partial(prox_trace_indicator, lamda=tau / rho), A))

        # update W_1, W_2
        if eta != 0:
            A_1 = Z_1 - R[:-1] - X_1
            # A_1 = W_0[:-1].copy()
            A_2 = Z_2 - R[1:] - X_2
            # A_2 = W_0[1:].copy()
            prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho)
            W_1 = .5 * (A_1 + A_2 - prox_e)
            W_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            W_1 = W_0[:-1].copy()
            W_2 = W_0[1:].copy()

        # update residuals
        X_0 += R - Z_0 + W_0
        X_1 += R[:-1] - Z_1 + W_1
        X_2 += R[1:] - Z_2 + W_2

        # diagnostics, reporting, termination checks
        X_consensus = X_0.copy()
        X_consensus[:-1] += X_1
        X_consensus[1:] += X_2
        X_consensus /= divisor[:, None, None]

        Z_consensus = Z_0.copy()
        Z_consensus[:-1] += Z_1
        Z_consensus[1:] += Z_2
        Z_consensus /= divisor[:, None, None]

        W_consensus = W_0.copy()
        W_consensus[:-1] += W_1
        W_consensus[1:] += W_2
        W_consensus /= divisor[:, None, None]

        check = convergence(
            obj=objective(emp_cov, R, Z_0, Z_1, Z_2, W_0, W_1, W_2,
                          alpha, tau, beta, eta, psi, phi),
            rnorm=np.linalg.norm(R - Z_consensus + W_consensus),
            snorm=np.linalg.norm(rho * (R - R_old)),
            e_pri=np.sqrt(np.prod(K.shape)) * tol + rtol * max(
                np.linalg.norm(R),
                np.sqrt(squared_norm(Z_consensus) - squared_norm(W_consensus))),
            e_dual=np.sqrt(np.prod(K.shape)) * tol + rtol * np.linalg.norm(
                rho * X_consensus)
        )
        R_old = R.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        # if iteration_ % 10 == 0:
        #     rho = rho * 0.8
    else:
        warnings.warn("Objective did not converge.")

    # return_list = [Z_consensus, W_consensus, emp_cov]
    return_list = [Z_consensus, W_0, W_1, W_2, emp_cov]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
def kernel_time_graphical_lasso(
    emp_cov,
    alpha=0.01,
    rho=1,
    kernel=None,
    max_iter=100,
    n_samples=None,
    verbose=False,
    psi="laplacian",
    tol=1e-4,
    rtol=1e-4,
    return_history=False,
    return_n_iter=True,
    mode="admm",
    update_rho_options=None,
    compute_objective=True,
    stop_at=None,
    stop_when=1e-4,
    init="empirical",
):
    """Time-varying graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1}
            + sum_{s>t}^T k_psi(s,t) Psi(K_s - K_t)

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    X : numpy.array, 2-dimensional
        Solution to the problem.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    n_times, _, n_features = emp_cov.shape

    if kernel is None:
        kernel = np.eye(n_times)

    Z_0 = init_precision(emp_cov, mode=init)
    U_0 = np.zeros_like(Z_0)
    Z_0_old = np.zeros_like(Z_0)

    Z_M, Z_M_old = {}, {}
    U_M = {}
    for m in range(1, n_times):
        # all possible markovians jumps
        Z_L = Z_0.copy()[:-m]
        Z_R = Z_0.copy()[m:]
        Z_M[m] = (Z_L, Z_R)

        U_L = np.zeros_like(Z_L)
        U_R = np.zeros_like(Z_R)
        U_M[m] = (U_L, U_R)

        Z_L_old = np.zeros_like(Z_L)
        Z_R_old = np.zeros_like(Z_R)
        Z_M_old[m] = (Z_L_old, Z_R_old)

    if n_samples is None:
        n_samples = np.ones(n_times)

    checks = [
        convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_M, alpha,
                                  kernel, psi))
    ]
    for iteration_ in range(max_iter):
        # update K
        A = Z_0 - U_0
        for m in range(1, n_times):
            A[:-m] += Z_M[m][0] - U_M[m][0]
            A[m:] += Z_M[m][1] - U_M[m][1]

        A /= n_times
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # K = np.array(map(soft_thresholding_, A))
        A += A.transpose(0, 2, 1)
        A /= 2.0

        A *= -rho * n_times / n_samples[:, None, None]
        A += emp_cov

        K = np.array([
            prox_logdet(a, lamda=ni / (rho * n_times))
            for a, ni in zip(A, n_samples)
        ])

        # update Z_0
        A = K + U_0
        A += A.transpose(0, 2, 1)
        A /= 2.0
        Z_0 = soft_thresholding(A, lamda=alpha / rho)

        # update residuals
        U_0 += K - Z_0

        # other Zs
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            A_L = K[:-m] + U_L
            A_R = K[m:] + U_R
            if not psi_node_penalty:
                prox_e = prox_psi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel, m)[:, None, None] / rho)
                Z_L = 0.5 * (A_L + A_R - prox_e)
                Z_R = 0.5 * (A_L + A_R + prox_e)
            else:
                Z_L, Z_R = prox_psi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            Z_M[m] = (Z_L, Z_R)

            # update other residuals
            U_L += K[:-m] - Z_L
            U_R += K[m:] - Z_R

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(K - Z_0) + sum(
                squared_norm(K[:-m] - Z_M[m][0]) +
                squared_norm(K[m:] - Z_M[m][1]) for m in range(1, n_times)))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + sum(
                squared_norm(Z_M[m][0] - Z_M_old[m][0]) +
                squared_norm(Z_M[m][1] - Z_M_old[m][1])
                for m in range(1, n_times)))

        obj = objective(n_samples, emp_cov, Z_0, K, Z_M, alpha, kernel,
                        psi) if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=n_features * n_times * tol + rtol * max(
                np.sqrt(
                    squared_norm(Z_0) + sum(
                        squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1])
                        for m in range(1, n_times))),
                np.sqrt(
                    squared_norm(K) + sum(
                        squared_norm(K[:-m]) + squared_norm(K[m:])
                        for m in range(1, n_times))),
            ),
            e_dual=n_features * n_times * tol + rtol * rho * np.sqrt(
                squared_norm(U_0) + sum(
                    squared_norm(U_M[m][0]) + squared_norm(U_M[m][1])
                    for m in range(1, n_times))),
        )
        Z_0_old = Z_0.copy()
        for m in range(1, n_times):
            Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy())

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U_0 *= rho / rho_new
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            U_L *= rho / rho_new
            U_R *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
    def _fit(self, emp_cov, n_samples):
        if self.kernel is None:
            # from scipy.optimize import minimize
            # discover best kernel parameter via EM
            # initialise precision matrices, as warm start
            self.precision_ = init_precision(emp_cov, mode=self.init)
            n_times = self.precision_.shape[0]
            theta_old = np.zeros(n_times * (n_times - 1) // 2)
            # idx = np.triu_indices(n_times, 1)
            kernel = np.eye(n_times)

            psi, _, _ = check_norm_prox(self.psi)
            if self.n_clusters is None:
                self.n_clusters = n_times

            for i in range(self.max_iter_ext):
                # E step - discover best kernel
                # , method='bounded'bounds=[(0, None)]*theta_old.size
                # theta = minimize(
                #     objective_similarity, theta_old,
                #     args=(self.precision_, self.classes_[:, None], psi)
                #     ).x
                # theta -= np.min(theta)
                # theta /= np.max(theta)
                theta = precision_similarity(self.precision_, psi)

                # if i > 0 and np.linalg.norm(theta_old -
                #                             theta) / theta.size < self.eps:
                #     break

                # kernel[idx] = theta
                # kernel[idx[::-1]] = theta
                kernel = theta

                labels_pred = AgglomerativeClustering(
                    n_clusters=self.n_clusters,
                    affinity="precomputed",
                    linkage="complete").fit_predict(kernel)
                if i > 0 and np.linalg.norm(labels_pred - labels_pred_old
                                            ) / labels_pred.size < self.eps:
                    break
                kernel = kernels.RBF(0.0001)(
                    labels_pred[:, None]) + kernels.RBF(self.beta)(
                        np.arange(n_times)[:, None])

                # normalize_matrix(kernel_sum)
                # kernel += kerne * self.beta

                # M step - fix the kernel matrix
                out = kernel_time_graphical_lasso(
                    emp_cov,
                    alpha=self.alpha,
                    rho=self.rho,
                    kernel=kernel,
                    n_samples=n_samples,
                    tol=self.tol,
                    rtol=self.rtol,
                    psi=self.psi,
                    max_iter=self.max_iter,
                    verbose=self.verbose,
                    return_n_iter=True,
                    return_history=self.return_history,
                    update_rho_options=self.update_rho_options,
                    compute_objective=self.compute_objective,
                    init=self.precision_,
                )

                if self.return_history:
                    (self.precision_, self.covariance_, self.history_,
                     self.n_iter_) = out
                else:
                    self.precision_, self.covariance_, self.n_iter_ = out
                theta_old = theta
                labels_pred_old = labels_pred
                # kernel = graph_k_means(
                #   list(self.precision_), 3, max_iter=100)
                # self.similarity_matrix = kernel
                # theta_old = kernel
                # if i > 0 and np.linalg.norm(theta_old -
                #                             kernel) / kernel.size < self.eps:
                #     break
            else:
                warnings.warn("theta did not converge.")
            self.similarity_matrix_ = kernel

        else:
            kernel = self.kernel
            if kernel.shape[0] != self.classes_.size:
                raise ValueError(
                    "Kernel size does not match classes of samples, "
                    "got {} classes and kernel has shape {}".format(
                        self.classes_.size, kernel.shape[0]))

            out = kernel_time_graphical_lasso(
                emp_cov,
                alpha=self.alpha,
                rho=self.rho,
                kernel=kernel,
                n_samples=n_samples,
                tol=self.tol,
                rtol=self.rtol,
                psi=self.psi,
                max_iter=self.max_iter,
                verbose=self.verbose,
                return_n_iter=True,
                return_history=self.return_history,
                update_rho_options=self.update_rho_options,
                compute_objective=self.compute_objective,
                init=self.init,
            )
            if self.return_history:
                (self.precision_, self.covariance_, self.history_,
                 self.n_iter_) = out
            else:
                self.precision_, self.covariance_, self.n_iter_ = out

        return self
def latent_time_graphical_lasso(emp_cov,
                                alpha=0.01,
                                tau=1.,
                                rho=1.,
                                beta=1.,
                                eta=1.,
                                max_iter=100,
                                n_samples=None,
                                verbose=False,
                                psi='laplacian',
                                phi='laplacian',
                                mode='admm',
                                tol=1e-4,
                                rtol=1e-4,
                                return_history=False,
                                return_n_iter=True,
                                update_rho_options=None,
                                compute_objective=True,
                                init='empirical'):
    r"""Latent variable time-varying graphical lasso solver.

    Solves the following problem via ADMM:
      min sum_{i=1}^T -n_i log_likelihood(S_i, K_i-L_i) + alpha ||K_i||_{od,1}
          + tau ||L_i||_*
          + beta sum_{i=2}^T Psi(K_i - K_{i-1})
          + eta sum_{i=2}^T Phi(L_i - L_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, tau, beta, eta : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    phi, prox_phi, phi_node_penalty = check_norm_prox(phi)

    Z_0 = init_precision(emp_cov, mode=init)
    Z_1 = Z_0.copy()[:-1]
    Z_2 = Z_0.copy()[1:]
    W_0 = np.zeros_like(Z_0)
    W_1 = np.zeros_like(Z_1)
    W_2 = np.zeros_like(Z_2)

    X_0 = np.zeros_like(Z_0)
    X_1 = np.zeros_like(Z_1)
    X_2 = np.zeros_like(Z_2)
    U_1 = np.zeros_like(W_1)
    U_2 = np.zeros_like(W_2)

    R_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)
    W_1_old = np.zeros_like(W_1)
    W_2_old = np.zeros_like(W_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    if n_samples is None:
        n_samples = np.ones(emp_cov.shape[0])

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        A += A.transpose(0, 2, 1)
        A /= 2.
        A *= -rho / n_samples[:, None, None]
        A += emp_cov
        # A = emp_cov / rho - A

        R = np.array(
            [prox_logdet(a, lamda=ni / rho) for a, ni in zip(A, n_samples)])

        # update Z_0
        A = R + W_0 + X_0
        A[:-1] += Z_1 - X_1
        A[1:] += Z_2 - X_2
        A /= divisor[:, None, None]
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # Z_0 = np.array(map(soft_thresholding_, A))
        Z_0 = soft_thresholding(A,
                                lamda=alpha / (rho * divisor[:, None, None]))

        # update Z_1, Z_2
        A_1 = Z_0[:-1] + X_1
        A_2 = Z_0[1:] + X_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * beta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update W_0
        A = Z_0 - R - X_0
        A[:-1] += W_1 - U_1
        A[1:] += W_2 - U_2
        A /= divisor[:, None, None]
        A += A.transpose(0, 2, 1)
        A /= 2.

        W_0 = np.array([
            prox_trace_indicator(a, lamda=tau / (rho * div))
            for a, div in zip(A, divisor)
        ])

        # update W_1, W_2
        A_1 = W_0[:-1] + U_1
        A_2 = W_0[1:] + U_2
        if not phi_node_penalty:
            prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho)
            W_1 = .5 * (A_1 + A_2 - prox_e)
            W_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            W_1, W_2 = prox_phi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * eta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        X_0 += R - Z_0 + W_0
        X_1 += Z_0[:-1] - Z_1
        X_2 += Z_0[1:] - Z_2
        U_1 += W_0[:-1] - W_1
        U_2 += W_0[1:] - W_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(R - Z_0 + W_0) + squared_norm(Z_0[:-1] - Z_1) +
            squared_norm(Z_0[1:] - Z_2) + squared_norm(W_0[:-1] - W_1) +
            squared_norm(W_0[1:] - W_2))

        snorm = rho * np.sqrt(
            squared_norm(R - R_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old) + squared_norm(W_1 - W_1_old) +
            squared_norm(W_2 - W_2_old))

        obj = objective(emp_cov, n_samples, R, Z_0, Z_1, Z_2, W_0, W_1, W_2,
                        alpha, tau, beta, eta, psi, phi) \
            if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * max(
                np.sqrt(
                    squared_norm(R) + squared_norm(Z_1) + squared_norm(Z_2) +
                    squared_norm(W_1) + squared_norm(W_2)),
                np.sqrt(
                    squared_norm(Z_0 - W_0) + squared_norm(Z_0[:-1]) +
                    squared_norm(Z_0[1:]) + squared_norm(W_0[:-1]) +
                    squared_norm(W_0[1:]))),
            e_dual=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * rho *
            (np.sqrt(
                squared_norm(X_0) + squared_norm(X_1) + squared_norm(X_2) +
                squared_norm(U_1) + squared_norm(U_2))))

        R_old = R.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()
        W_1_old = W_1.copy()
        W_2_old = W_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        X_0 *= rho / rho_new
        X_1 *= rho / rho_new
        X_2 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, W_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
def equality_time_graphical_lasso(
        S,
        K_init,
        max_iter,
        loss,
        C,
        rho,  # n_samples=None, 
        psi,
        gamma,
        tol,
        rtol,
        verbose,
        return_history,
        return_n_iter,
        mode,
        compute_objective,
        stop_at,
        stop_when,
        update_rho_options,
        init):
    """Equality constrained time-varying graphical LASSO solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1})
        s.t. objective = c_i for i = 1, ..., T

    where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    gamma: float, optional
        Kernel parameter when psi is chosen to be 'kernel'.
    constrained_to: float or ndarray, shape (time steps)
        Log likelihood constraints for K_i
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    psi_name = psi.__name__

    if loss == 'LL':
        loss_function = neg_logl
    else:
        loss_function = dtrace

    K = K_init
    Z_0 = K.copy()
    Z_1 = K.copy()[:-1]
    Z_2 = K.copy()[1:]

    u = np.zeros((S.shape[0]))
    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    I = np.eye(S.shape[1])

    checks = [
        convergence(
            obj=equality_objective(loss_function, S, K, C, Z_0, Z_1, Z_2, psi))
    ]

    for iteration_ in range(max_iter):
        # update K
        A_K = U_0 - Z_0
        A_K[:-1] += Z_1 - U_1
        A_K[1:] += Z_2 - U_2
        A_K += A_K.transpose(0, 2, 1)
        A_K /= 2.

        K = soft_thresholding_od(A_K, lamda=1. / rho)

        # update Z_0
        residual_loss_constraint_u = loss_gen(loss_function, S, Z_0) - C + u

        A_Z = K + U_0
        A_Z += A_Z.transpose(0, 2, 1)
        A_Z /= 2.

        if loss_function == neg_logl:
            A_Z -= residual_loss_constraint_u[:, None, None] * S
            Z_0 = np.array([
                prox_logdet_constrained(_A, _a, I)
                for _A, _a in zip(A_Z, residual_loss_constraint_u)
            ])
        elif loss_function == dtrace:
            Z_0 = np.array([
                prox_dtrace_constrained(_A, _S, _a, I)
                for _A, _S, _a in zip(A_Z, S, residual_loss_constraint_u)
            ])

        # other Zs
        A_1 = K[:-1] + U_1
        A_2 = K[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        residual_loss_constraint = loss_gen(loss_function, S, Z_0) - C
        u += residual_loss_constraint
        U_0 += K - Z_0
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        print(residual_loss_constraint)

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            np.sum(residual_loss_constraint**2) + squared_norm(K - Z_0) +
            squared_norm(K[:-1] - Z_1) + squared_norm(K[1:] - Z_2))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old))

        obj = equality_objective(loss_function, S, K, C, Z_0, Z_1, Z_2,
                                 psi) if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(Z_0.size + 2 * Z_1.size + S.shape[0]) * tol +
            rtol * max(
                np.sqrt(
                    np.sum(C**2) + squared_norm(Z_0) + squared_norm(Z_1) +
                    squared_norm(Z_2)),
                np.sqrt(
                    np.sum(
                        (residual_loss_constraint + C)**2) + squared_norm(K) +
                    squared_norm(K[:-1]) + squared_norm(K[1:]))),
            e_dual=np.sqrt(Z_0.size + 2 * Z_1.size) * tol + rtol * rho *
            np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
        )
        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u *= rho / rho_new
        U_0 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new

        #assert is_pos_def(Z_0)
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in K])
    return_list = [K, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
def kernel_latent_time_graphical_lasso(
    emp_cov,
    alpha=0.01,
    tau=1.0,
    rho=1.0,
    kernel_psi=None,
    kernel_phi=None,
    max_iter=100,
    verbose=False,
    psi="laplacian",
    phi="laplacian",
    mode="admm",
    tol=1e-4,
    rtol=1e-4,
    assume_centered=False,
    n_samples=None,
    return_history=False,
    return_n_iter=True,
    update_rho_options=None,
    compute_objective=True,
    init="empirical",
):
    r"""Time-varying latent variable graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1}
            + tau ||L_i||_*
            + sum_{s>t}^T k_psi(s,t) Psi(K_s - K_t)
            + sum_{s>t}^T k_phi(s,t)(L_s - L_t)

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, tau, beta, eta : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    phi, prox_phi, phi_node_penalty = check_norm_prox(phi)
    n_times, _, n_features = emp_cov.shape

    if kernel_psi is None:
        kernel_psi = np.eye(n_times)
    if kernel_phi is None:
        kernel_phi = np.eye(n_times)

    Z_0 = init_precision(emp_cov, mode=init)
    W_0 = np.zeros_like(Z_0)
    X_0 = np.zeros_like(Z_0)
    R_old = np.zeros_like(Z_0)

    Z_M, Z_M_old = {}, {}
    Y_M = {}
    W_M, W_M_old = {}, {}
    U_M = {}
    for m in range(1, n_times):
        Z_L = Z_0.copy()[:-m]
        Z_R = Z_0.copy()[m:]
        Z_M[m] = (Z_L, Z_R)

        W_L = np.zeros_like(Z_L)
        W_R = np.zeros_like(Z_R)
        W_M[m] = (W_L, W_R)

        Y_L = np.zeros_like(Z_L)
        Y_R = np.zeros_like(Z_R)
        Y_M[m] = (Y_L, Y_R)

        U_L = np.zeros_like(W_L)
        U_R = np.zeros_like(W_R)
        U_M[m] = (U_L, U_R)

        Z_L_old = np.zeros_like(Z_L)
        Z_R_old = np.zeros_like(Z_R)
        Z_M_old[m] = (Z_L_old, Z_R_old)

        W_L_old = np.zeros_like(W_L)
        W_R_old = np.zeros_like(W_R)
        W_M_old[m] = (W_L_old, W_R_old)

    if n_samples is None:
        n_samples = np.ones(n_times)

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        A += A.transpose(0, 2, 1)
        A /= 2.0
        A *= -rho / n_samples[:, None, None]
        A += emp_cov
        # A = emp_cov / rho - A

        R = np.array(
            [prox_logdet(a, lamda=ni / rho) for a, ni in zip(A, n_samples)])

        # update Z_0
        A = R + W_0 + X_0
        for m in range(1, n_times):
            A[:-m] += Z_M[m][0] - Y_M[m][0]
            A[m:] += Z_M[m][1] - Y_M[m][1]

        A /= n_times
        Z_0 = soft_thresholding(A, lamda=alpha / (rho * n_times))

        # update W_0
        A = Z_0 - R - X_0
        for m in range(1, n_times):
            A[:-m] += W_M[m][0] - U_M[m][0]
            A[m:] += W_M[m][1] - U_M[m][1]

        A /= n_times
        A += A.transpose(0, 2, 1)
        A /= 2.0

        W_0 = np.array(
            [prox_trace_indicator(a, lamda=tau / (rho * n_times)) for a in A])

        # update residuals
        X_0 += R - Z_0 + W_0

        for m in range(1, n_times):
            # other Zs
            Y_L, Y_R = Y_M[m]
            A_L = Z_0[:-m] + Y_L
            A_R = Z_0[m:] + Y_R
            if not psi_node_penalty:
                prox_e = prox_psi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel_psi, m)[:, None, None] / rho)
                Z_L = 0.5 * (A_L + A_R - prox_e)
                Z_R = 0.5 * (A_L + A_R + prox_e)
            else:
                Z_L, Z_R = prox_psi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel_psi, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            Z_M[m] = (Z_L, Z_R)

            # update other residuals
            Y_L += Z_0[:-m] - Z_L
            Y_R += Z_0[m:] - Z_R

            # other Ws
            U_L, U_R = U_M[m]
            A_L = W_0[:-m] + U_L
            A_R = W_0[m:] + U_R
            if not phi_node_penalty:
                prox_e = prox_phi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel_phi, m)[:, None, None] / rho)
                W_L = 0.5 * (A_L + A_R - prox_e)
                W_R = 0.5 * (A_L + A_R + prox_e)
            else:
                W_L, W_R = prox_phi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel_phi, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            W_M[m] = (W_L, W_R)

            # update other residuals
            U_L += W_0[:-m] - W_L
            U_R += W_0[m:] - W_R

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(R - Z_0 + W_0) + sum(
                squared_norm(Z_0[:-m] - Z_M[m][0]) +
                squared_norm(Z_0[m:] - Z_M[m][1]) +
                squared_norm(W_0[:-m] - W_M[m][0]) +
                squared_norm(W_0[m:] - W_M[m][1]) for m in range(1, n_times)))

        snorm = rho * np.sqrt(
            squared_norm(R - R_old) + sum(
                squared_norm(Z_M[m][0] - Z_M_old[m][0]) +
                squared_norm(Z_M[m][1] - Z_M_old[m][1]) +
                squared_norm(W_M[m][0] - W_M_old[m][0]) +
                squared_norm(W_M[m][1] - W_M_old[m][1])
                for m in range(1, n_times)))

        obj = (objective(emp_cov, n_samples, R, Z_0, Z_M, W_0, W_M, alpha, tau,
                         kernel_psi, kernel_phi, psi, phi)
               if compute_objective else np.nan)

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=n_features * np.sqrt(n_times * (2 * n_times - 1)) * tol +
            rtol * max(
                np.sqrt(
                    squared_norm(R) + sum(
                        squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1]) +
                        squared_norm(W_M[m][0]) + squared_norm(W_M[m][1])
                        for m in range(1, n_times))),
                np.sqrt(
                    squared_norm(Z_0 - W_0) + sum(
                        squared_norm(Z_0[:-m]) + squared_norm(Z_0[m:]) +
                        squared_norm(W_0[:-m]) + squared_norm(W_0[m:])
                        for m in range(1, n_times))),
            ),
            e_dual=n_features * np.sqrt(n_times * (2 * n_times - 1)) * tol +
            rtol * rho * np.sqrt(
                squared_norm(X_0) + sum(
                    squared_norm(Y_M[m][0]) + squared_norm(Y_M[m][1]) +
                    squared_norm(U_M[m][0]) + squared_norm(U_M[m][1])
                    for m in range(1, n_times))),
        )

        R_old = R.copy()
        for m in range(1, n_times):
            Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy())
            W_M_old[m] = (W_M[m][0].copy(), W_M[m][1].copy())

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        X_0 *= rho / rho_new
        for m in range(1, n_times):
            Y_L, Y_R = Y_M[m]
            Y_L *= rho / rho_new
            Y_R *= rho / rho_new

            U_L, U_R = U_M[m]
            U_L *= rho / rho_new
            U_R *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, W_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
Esempio n. 16
0
def inequality_time_graphical_lasso(S, K_init, max_iter, loss, C, theta,
                                    c_prox, rho, div, psi, gamma, tol, rtol,
                                    verbose, return_history, return_n_iter,
                                    mode, compute_objective, stop_at,
                                    stop_when, update_rho_options, init):
    """Inequality constrained time-varying graphical LASSO solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1})
        s.t. objective =< c_i for i = 1, ..., T

    where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    gamma: float, optional
        Kernel parameter when psi is chosen to be 'kernel'.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    psi_name = psi.__name__

    if loss == 'LL':
        loss_function = neg_logl
    else:
        loss_function = dtrace

    Z_0 = K_init  # init_precision(S, mode=init)
    Z_1 = Z_0.copy()[:-1]
    Z_2 = Z_0.copy()[1:]

    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    # divisor for consensus variables, accounting for one less matrix for t = 0 and t = T
    divisor = np.full(S.shape[0], 2, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    out_obj = []

    checks = [convergence(obj=penalty_objective(Z_0, Z_1, Z_2, psi, theta))]

    for iteration_ in range(max_iter):
        A_K_pen = np.zeros_like(Z_0)
        A_K_pen[:-1] += Z_1 - U_1
        A_K_pen[1:] += Z_2 - U_2
        A_K_pen += A_K_pen.transpose(0, 2, 1)
        A_K_pen /= 2.

        Z_0 = soft_thresholding_od(A_K_pen / divisor[:, None, None],
                                   lamda=theta / (rho * divisor))

        # check feasibility and perform line search if necessary
        losses_all = loss_gen(loss_function, S, Z_0)
        feasibility_check = losses_all > C
        infeasible_indices = list(
            compress(range(len(feasibility_check)), feasibility_check))

        for i in infeasible_indices:
            if c_prox == 'cvx':
                Z_0[i], loss_i = prox_cvx(loss_function, S[i], Z_0[i],
                                          Z_0_old[i], C[i], div)
            elif c_prox == 'grad':
                if i > 0:
                    Z_0[i], loss_i = prox_grad(loss_function, S[i], Z_0[i],
                                               Z_0_old[i], C[i], 0.)
                else:
                    Z_0[i], loss_i = prox_grad(loss_function, S[i], Z_0[i],
                                               Z_0_old[i], C[i], 0.)

        # break if losses post-correction blow up
        losses_all_new = loss_gen(loss_function, S, Z_0)
        if np.inf in losses_all_new:
            print(iteration_, 'Inf')
            covariance_ = np.array([linalg.pinvh(x) for x in Z_0_old])
            return_list = [Z_0_old, covariance_]
            if return_history:
                return_list.append(checks)
            if return_n_iter:
                return_list.append(iteration_)
            return return_list

        # other Zs
        A_1 = Z_0[:-1] + U_1
        A_2 = Z_0[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * (1 - theta) / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * (1 - theta) / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        U_1 += Z_0[:-1] - Z_1
        U_2 += Z_0[1:] - Z_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(Z_0[:-1] - Z_1) + squared_norm(Z_0[1:] - Z_2))
        snorm = rho * np.sqrt(
            squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old))

        obj = penalty_objective(Z_0, Z_1, Z_2, psi, theta)

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(losses_all_new.size + 2 * Z_1.size) * tol + rtol *
            (max(np.sqrt(squared_norm(losses_all_new)), np.sqrt(
                squared_norm(C))) +
             max(np.sqrt(squared_norm(Z_1)), np.sqrt(squared_norm(Z_0[:-1]))) +
             max(np.sqrt(squared_norm(Z_2)), np.sqrt(squared_norm(Z_0[1:])))),
            e_dual=np.sqrt(2 * Z_1.size) * tol +
            rtol * rho * np.sqrt(squared_norm(U_1) + squared_norm(U_2)))

        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        out_obj.append(penalty_objective(Z_0, Z_0[:-1], Z_0[1:], psi, theta))
        checks.append(check)

        # if len(out_obj) > 100 and c_prox == 'grad':
        #     if (np.mean(out_obj[-11:-1]) - np.mean(out_obj[-10:])) < stop_when:
        #         print('obj break')
        #         break

        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        # rho_new = update_rho(
        #     rho, rnorm, snorm, iteration=iteration_,
        #     mu=1e2, tau_inc=1.01, tau_dec=1.01)
        #     # **(update_rho_options or {}))
        # # scaled dual variables should be also rescaled
        # U_1 *= rho / rho_new
        # U_2 *= rho / rho_new
        # rho = rho_new

    else:
        warnings.warn("Objective did not converge.")

    print(iteration_, out_obj[-1])
    # print(out_obj)
    print(check.rnorm, check.e_pri)
    print(check.snorm, check.e_dual)

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
Esempio n. 17
0
def time_latent_graph_lasso(emp_cov,
                            alpha=1.,
                            tau=1.,
                            rho=1.,
                            beta=1.,
                            eta=1.,
                            max_iter=1000,
                            verbose=False,
                            psi='laplacian',
                            phi='laplacian',
                            assume_centered=False,
                            tol=1e-4,
                            rtol=1e-2,
                            return_history=False,
                            return_n_iter=True,
                            mode=None):
    r"""Time-varying latent variable graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1}
            + tau ||L_i||_*
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})
            + eta sum_{i=2}^T Phi(L_i - L_{i-1})

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    data_list : list of 2-dimensional matrices.
        Input matrices.
    alpha, tau : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi = check_norm_prox(psi)
    phi, prox_phi = check_norm_prox(phi)

    # emp_cov = np.array([empirical_covariance(
    #     x, assume_centered=assume_centered) for x in data_list])

    n_samples = np.array([s for s in [1.]])

    K = np.zeros_like(emp_cov)
    L = np.zeros_like(emp_cov)
    X = np.zeros_like(emp_cov)
    Z_0 = np.zeros_like(emp_cov)
    Z_1 = np.zeros_like(emp_cov)[:-1]
    Z_2 = np.zeros_like(emp_cov)[1:]
    W_0 = np.zeros_like(emp_cov)
    W_1 = np.zeros_like(emp_cov)[:-1]
    W_2 = np.zeros_like(emp_cov)[1:]
    U_0 = np.zeros_like(emp_cov)
    U_1 = np.zeros_like(emp_cov)[:-1]
    U_2 = np.zeros_like(emp_cov)[1:]
    Y_0 = np.zeros_like(emp_cov)
    Y_1 = np.zeros_like(emp_cov)[:-1]
    Y_2 = np.zeros_like(emp_cov)[1:]

    U_consensus = np.zeros_like(emp_cov)
    Y_consensus = np.zeros_like(emp_cov)
    Z_consensus = np.zeros_like(emp_cov)
    Z_consensus_old = np.zeros_like(emp_cov)
    W_consensus = np.zeros_like(emp_cov)
    W_consensus_old = np.zeros_like(emp_cov)
    R_old = np.zeros_like(emp_cov)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.zeros(emp_cov.shape[0]) + 3
    divisor[0] -= 1
    divisor[-1] -= 1
    # eta = np.divide(n_samples, divisor * rho)

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = K - L - X
        # A += np.array(map(np.transpose, A))
        # A /= 2.
        A *= -rho / n_samples[:, None, None]
        A += emp_cov
        R = np.array(map(prox_logdet, A, n_samples / rho))

        # update K, L
        K = L + R + X + Z_0 - U_0
        K[:-1] += Z_1 - U_1
        K[1:] += Z_2 - U_2
        K /= divisor[:, None, None] + 1

        L = K - R - X + W_0 - Y_0
        L[:-1] += W_1 - Y_1
        L[1:] += W_2 - Y_2
        L /= divisor[:, None, None] + 1

        # update Z_0
        # Zold = Z
        # X_hat = alpha * X + (1 - alpha) * Zold
        soft_thresholding = partial(soft_thresholding_sign, lamda=alpha / rho)
        Z_0 = np.array(map(soft_thresholding, K + U_0))

        # update Z_1, Z_2
        A_1 = K[:-1] + U_1
        A_2 = K[1:] + U_2
        prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
        Z_1 = .5 * (A_1 + A_2 - prox_e)
        Z_2 = .5 * (A_1 + A_2 + prox_e)

        # update W_0
        A = L + Y_0
        W_0 = np.array(map(partial(prox_trace_indicator, lamda=tau / rho), A))

        # update W_1, W_2
        A_1 = L[:-1] + Y_1
        A_2 = L[1:] + Y_2
        prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho)
        W_1 = .5 * (A_1 + A_2 - prox_e)
        W_2 = .5 * (A_1 + A_2 + prox_e)

        # update residuals
        X += R - K + L

        U_0 += (K - Z_0)
        U_1 += (K[:-1] - Z_1)
        U_2 += (K[1:] - Z_2)

        Y_0 += (L - W_0)
        Y_1 += (L[:-1] - W_1)
        Y_2 += (L[1:] - W_2)

        # diagnostics, reporting, termination checks
        Z_consensus = Z_0.copy()
        Z_consensus[:-1] += Z_1
        Z_consensus[1:] += Z_2
        Z_consensus /= divisor[:, None, None]

        U_consensus = U_0.copy()
        U_consensus[:-1] += U_1
        U_consensus[1:] += U_2
        U_consensus /= divisor[:, None, None]

        W_consensus = W_0.copy()
        W_consensus[:-1] += W_1
        W_consensus[1:] += W_2
        W_consensus /= divisor[:, None, None]

        Y_consensus = Y_0.copy()
        Y_consensus[:-1] += Y_1
        Y_consensus[1:] += Y_2
        Y_consensus /= divisor[:, None, None]

        check = convergence(
            obj=objective(n_samples, emp_cov, R, Z_0, Z_1, Z_2, W_0, W_1, W_2,
                          alpha, tau, beta, eta, psi, phi),
            rnorm=np.sqrt(
                squared_norm(K - Z_consensus) + squared_norm(L - W_consensus) +
                squared_norm(K - L - R)),
            snorm=np.sqrt(
                squared_norm(rho * (Z_consensus - Z_consensus_old)) +
                squared_norm(rho * (W_consensus - W_consensus_old)) +
                squared_norm(rho * (R - R_old))),
            e_pri=np.sqrt(np.prod(K.shape) * 3) * tol + rtol * max(
                np.sqrt(
                    squared_norm(K) + squared_norm(L) + squared_norm(K - L)),
                np.sqrt(
                    squared_norm(Z_consensus) + squared_norm(W_consensus) +
                    squared_norm(R))),
            e_dual=np.sqrt(np.prod(K.shape) * 3) * tol + rtol * np.sqrt(
                squared_norm(rho * (U_consensus)) +
                squared_norm(rho * (Y_consensus)) + squared_norm(rho * (X))))
        Z_consensus_old = Z_consensus.copy()
        W_consensus_old = W_consensus.copy()
        R_old = R.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break
    else:
        warnings.warn("Objective did not converge.")

    return_list = [K, L, emp_cov]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list