예제 #1
0
def tgl_forward_backward(
    emp_cov, alpha=0.01, beta=1., max_iter=100, n_samples=None, verbose=False,
    tol=1e-4, delta=1e-4, gamma=1., lamda=1., eps=0.5, debug=False,
    return_history=False, return_n_iter=True, choose='gamma',
    lamda_criterion='b', time_norm=1, compute_objective=True,
    return_n_linesearch=False, vareps=1e-5, stop_at=None, stop_when=1e-4,
        laplacian_penalty=False, init='empirical'):
    """Time-varying graphical lasso solver with forward-backward splitting.

    Solves the following problem via FBS:
        min sum_{i=1}^T -n_i log_likelihood(S_i, K_i) + alpha*||K_i||_{od,1}
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_times, n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameters.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    verbose : bool, default False
        Print info at each iteration.
    tol : float, optional
        Absolute tolerance for convergence.
    delta, gamma, lamda, eps : float, optional
        FBS parameters.
    debug : bool, default False
        Run in debug mode.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    choose : ('gamma', 'lambda', 'fixed', 'both)
        Search iteratively gamma / lambda / none / both.
    lamda_criterion : ('a', 'b', 'c')
        Criterion to choose lamda. See ref for details.
    time_norm : float, optional
        Choose the temporal norm between points.
    compute_objective : bool, default True
        Choose to compute the objective value.
    return_n_linesearch : bool, optional
        Return the number of line-search iterations before convergence.
    vareps : float, optional
        Jitter for the loss.
    stop_at, stop_when : float, optional
        Other convergence criteria, as used in the paper.
    laplacian_penalty : bool, default False
        Use Laplacian penalty.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K, covariance : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    available_choose = ('gamma', 'lamda', 'fixed', 'both')
    if choose not in available_choose:
        raise ValueError(
            "`choose` parameter must be one of %s." % available_choose)

    n_times, _, n_features = emp_cov.shape
    K = init_precision(emp_cov, mode=init)

    if laplacian_penalty:
        obj_partial = partial(
            objective_laplacian, n_samples=n_samples, emp_cov=emp_cov,
            alpha=alpha, beta=beta, vareps=vareps)
        function_f = partial(
            loss_laplacian, beta=beta, n_samples=n_samples, S=emp_cov,
            vareps=vareps)
        gradient_f = partial(
            grad_loss_laplacian, emp_cov=emp_cov, beta=beta,
            n_samples=n_samples, vareps=vareps)
        function_g = partial(penalty_laplacian, alpha=alpha)
    else:
        psi = partial(vector_p_norm, p=time_norm)
        obj_partial = partial(
            objective, n_samples=n_samples, emp_cov=emp_cov, alpha=alpha,
            beta=beta, psi=psi, vareps=vareps)
        function_f = partial(
            loss, n_samples=n_samples, S=emp_cov, vareps=vareps)
        gradient_f = partial(
            grad_loss, emp_cov=emp_cov, n_samples=n_samples, vareps=vareps)
        function_g = partial(penalty, alpha=alpha, beta=beta, psi=psi)

    max_residual = -np.inf
    n_linesearch = 0
    checks = [convergence(obj=obj_partial(precision=K))]
    for iteration_ in range(max_iter):
        k_previous = K.copy()
        x_inv = np.array([linalg.pinvh(x) for x in K])
        grad = gradient_f(K, x_inv=x_inv)

        if choose in ['gamma', 'both']:
            gamma, y = choose_gamma(
                gamma / eps if iteration_ > 0 else gamma, K,
                function_f=function_f, beta=beta, alpha=alpha, lamda=lamda,
                grad=grad, delta=delta, eps=eps, max_iter=200, p=time_norm,
                x_inv=x_inv, choose=choose,
                laplacian_penalty=laplacian_penalty)

        x_hat = K - gamma * grad
        if choose not in ['gamma', 'both']:
            if laplacian_penalty:
                y = soft_thresholding_od(x_hat, alpha * gamma)
            else:
                y = prox_FL(
                    x_hat, beta * gamma, alpha * gamma, p=time_norm,
                    symmetric=True)

        if choose in ('lamda', 'both'):
            lamda, n_ls = choose_lamda(
                min(lamda / eps if iteration_ > 0 else lamda,
                    1), K, function_f=function_f, objective_f=obj_partial,
                gradient_f=gradient_f, function_g=function_g, gamma=gamma,
                delta=delta, eps=eps, criterion=lamda_criterion, max_iter=200,
                p=time_norm, grad=grad, prox=y, vareps=vareps)
            n_linesearch += n_ls

        K = K + min(max(lamda, 0), 1) * (y - K)
        # K, t = fista_step(Y, Y - Y_old, t)

        check = convergence(
            obj=obj_partial(precision=K),
            rnorm=np.linalg.norm(upper_diag_3d(K) - upper_diag_3d(k_previous)),
            snorm=np.linalg.norm(
                obj_partial(precision=K) - obj_partial(precision=k_previous)),
            e_pri=np.sqrt(upper_diag_3d(K).size) * tol + tol * max(
                np.linalg.norm(upper_diag_3d(K)),
                np.linalg.norm(upper_diag_3d(k_previous))), e_dual=tol)

        if verbose and iteration_ % (50 if verbose < 2 else 1) == 0:
            print(
                "obj: %.4f, rnorm: %.7f, snorm: %.4f,"
                "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        if return_history:
            checks.append(check)

        if np.isnan(check.rnorm) or np.isnan(check.snorm):
            warnings.warn("precision is not positive definite.")

        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break
        else:
            # use this convergence criterion
            subgrad = (x_hat - K) / gamma
            if 0:
                if laplacian_penalty:
                    grad = grad_loss_laplacian(
                        K, emp_cov, n_samples, vareps=vareps)
                else:
                    grad = grad_loss(K, emp_cov, n_samples, vareps=vareps)
                res_norm = np.linalg.norm(grad + subgrad)

                if iteration_ == 0:
                    normalizer = res_norm + 1e-6
                max_residual = max(
                    np.linalg.norm(grad), np.linalg.norm(subgrad)) + 1e-6
            else:
                res_norm = np.linalg.norm(K - k_previous) / gamma
                max_residual = max(max_residual, res_norm)
                normalizer = max(
                    np.linalg.norm(grad), np.linalg.norm(subgrad)) + 1e-6

            r_rel = res_norm / max_residual
            r_norm = res_norm / normalizer

            if not debug and (r_rel <= tol
                              or r_norm <= tol) and iteration_ > 0:  # or (
                # check.rnorm <= check.e_pri and iteration_ > 0):
                break
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(k) for k in K])
    return_list = [K, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    if return_n_linesearch:
        return_list.append(n_linesearch)
    return return_list
def kernel_time_graphical_lasso(
    emp_cov,
    alpha=0.01,
    rho=1,
    kernel=None,
    max_iter=100,
    n_samples=None,
    verbose=False,
    psi="laplacian",
    tol=1e-4,
    rtol=1e-4,
    return_history=False,
    return_n_iter=True,
    mode="admm",
    update_rho_options=None,
    compute_objective=True,
    stop_at=None,
    stop_when=1e-4,
    init="empirical",
):
    """Time-varying graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1}
            + sum_{s>t}^T k_psi(s,t) Psi(K_s - K_t)

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    X : numpy.array, 2-dimensional
        Solution to the problem.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    n_times, _, n_features = emp_cov.shape

    if kernel is None:
        kernel = np.eye(n_times)

    Z_0 = init_precision(emp_cov, mode=init)
    U_0 = np.zeros_like(Z_0)
    Z_0_old = np.zeros_like(Z_0)

    Z_M, Z_M_old = {}, {}
    U_M = {}
    for m in range(1, n_times):
        # all possible markovians jumps
        Z_L = Z_0.copy()[:-m]
        Z_R = Z_0.copy()[m:]
        Z_M[m] = (Z_L, Z_R)

        U_L = np.zeros_like(Z_L)
        U_R = np.zeros_like(Z_R)
        U_M[m] = (U_L, U_R)

        Z_L_old = np.zeros_like(Z_L)
        Z_R_old = np.zeros_like(Z_R)
        Z_M_old[m] = (Z_L_old, Z_R_old)

    if n_samples is None:
        n_samples = np.ones(n_times)

    checks = [
        convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_M, alpha,
                                  kernel, psi))
    ]
    for iteration_ in range(max_iter):
        # update K
        A = Z_0 - U_0
        for m in range(1, n_times):
            A[:-m] += Z_M[m][0] - U_M[m][0]
            A[m:] += Z_M[m][1] - U_M[m][1]

        A /= n_times
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # K = np.array(map(soft_thresholding_, A))
        A += A.transpose(0, 2, 1)
        A /= 2.0

        A *= -rho * n_times / n_samples[:, None, None]
        A += emp_cov

        K = np.array([
            prox_logdet(a, lamda=ni / (rho * n_times))
            for a, ni in zip(A, n_samples)
        ])

        # update Z_0
        A = K + U_0
        A += A.transpose(0, 2, 1)
        A /= 2.0
        Z_0 = soft_thresholding(A, lamda=alpha / rho)

        # update residuals
        U_0 += K - Z_0

        # other Zs
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            A_L = K[:-m] + U_L
            A_R = K[m:] + U_R
            if not psi_node_penalty:
                prox_e = prox_psi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel, m)[:, None, None] / rho)
                Z_L = 0.5 * (A_L + A_R - prox_e)
                Z_R = 0.5 * (A_L + A_R + prox_e)
            else:
                Z_L, Z_R = prox_psi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            Z_M[m] = (Z_L, Z_R)

            # update other residuals
            U_L += K[:-m] - Z_L
            U_R += K[m:] - Z_R

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(K - Z_0) + sum(
                squared_norm(K[:-m] - Z_M[m][0]) +
                squared_norm(K[m:] - Z_M[m][1]) for m in range(1, n_times)))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + sum(
                squared_norm(Z_M[m][0] - Z_M_old[m][0]) +
                squared_norm(Z_M[m][1] - Z_M_old[m][1])
                for m in range(1, n_times)))

        obj = objective(n_samples, emp_cov, Z_0, K, Z_M, alpha, kernel,
                        psi) if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=n_features * n_times * tol + rtol * max(
                np.sqrt(
                    squared_norm(Z_0) + sum(
                        squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1])
                        for m in range(1, n_times))),
                np.sqrt(
                    squared_norm(K) + sum(
                        squared_norm(K[:-m]) + squared_norm(K[m:])
                        for m in range(1, n_times))),
            ),
            e_dual=n_features * n_times * tol + rtol * rho * np.sqrt(
                squared_norm(U_0) + sum(
                    squared_norm(U_M[m][0]) + squared_norm(U_M[m][1])
                    for m in range(1, n_times))),
        )
        Z_0_old = Z_0.copy()
        for m in range(1, n_times):
            Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy())

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U_0 *= rho / rho_new
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            U_L *= rho / rho_new
            U_R *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
    def _fit(self, emp_cov, n_samples):
        if self.kernel is None:
            # from scipy.optimize import minimize
            # discover best kernel parameter via EM
            # initialise precision matrices, as warm start
            self.precision_ = init_precision(emp_cov, mode=self.init)
            n_times = self.precision_.shape[0]
            theta_old = np.zeros(n_times * (n_times - 1) // 2)
            # idx = np.triu_indices(n_times, 1)
            kernel = np.eye(n_times)

            psi, _, _ = check_norm_prox(self.psi)
            if self.n_clusters is None:
                self.n_clusters = n_times

            for i in range(self.max_iter_ext):
                # E step - discover best kernel
                # , method='bounded'bounds=[(0, None)]*theta_old.size
                # theta = minimize(
                #     objective_similarity, theta_old,
                #     args=(self.precision_, self.classes_[:, None], psi)
                #     ).x
                # theta -= np.min(theta)
                # theta /= np.max(theta)
                theta = precision_similarity(self.precision_, psi)

                # if i > 0 and np.linalg.norm(theta_old -
                #                             theta) / theta.size < self.eps:
                #     break

                # kernel[idx] = theta
                # kernel[idx[::-1]] = theta
                kernel = theta

                labels_pred = AgglomerativeClustering(
                    n_clusters=self.n_clusters,
                    affinity="precomputed",
                    linkage="complete").fit_predict(kernel)
                if i > 0 and np.linalg.norm(labels_pred - labels_pred_old
                                            ) / labels_pred.size < self.eps:
                    break
                kernel = kernels.RBF(0.0001)(
                    labels_pred[:, None]) + kernels.RBF(self.beta)(
                        np.arange(n_times)[:, None])

                # normalize_matrix(kernel_sum)
                # kernel += kerne * self.beta

                # M step - fix the kernel matrix
                out = kernel_time_graphical_lasso(
                    emp_cov,
                    alpha=self.alpha,
                    rho=self.rho,
                    kernel=kernel,
                    n_samples=n_samples,
                    tol=self.tol,
                    rtol=self.rtol,
                    psi=self.psi,
                    max_iter=self.max_iter,
                    verbose=self.verbose,
                    return_n_iter=True,
                    return_history=self.return_history,
                    update_rho_options=self.update_rho_options,
                    compute_objective=self.compute_objective,
                    init=self.precision_,
                )

                if self.return_history:
                    (self.precision_, self.covariance_, self.history_,
                     self.n_iter_) = out
                else:
                    self.precision_, self.covariance_, self.n_iter_ = out
                theta_old = theta
                labels_pred_old = labels_pred
                # kernel = graph_k_means(
                #   list(self.precision_), 3, max_iter=100)
                # self.similarity_matrix = kernel
                # theta_old = kernel
                # if i > 0 and np.linalg.norm(theta_old -
                #                             kernel) / kernel.size < self.eps:
                #     break
            else:
                warnings.warn("theta did not converge.")
            self.similarity_matrix_ = kernel

        else:
            kernel = self.kernel
            if kernel.shape[0] != self.classes_.size:
                raise ValueError(
                    "Kernel size does not match classes of samples, "
                    "got {} classes and kernel has shape {}".format(
                        self.classes_.size, kernel.shape[0]))

            out = kernel_time_graphical_lasso(
                emp_cov,
                alpha=self.alpha,
                rho=self.rho,
                kernel=kernel,
                n_samples=n_samples,
                tol=self.tol,
                rtol=self.rtol,
                psi=self.psi,
                max_iter=self.max_iter,
                verbose=self.verbose,
                return_n_iter=True,
                return_history=self.return_history,
                update_rho_options=self.update_rho_options,
                compute_objective=self.compute_objective,
                init=self.init,
            )
            if self.return_history:
                (self.precision_, self.covariance_, self.history_,
                 self.n_iter_) = out
            else:
                self.precision_, self.covariance_, self.n_iter_ = out

        return self
    def _fit(self, emp_cov, n_samples):
        if self.ker_param == "auto":
            from scipy.optimize import minimize_scalar

            if not callable(self.kernel):
                raise ValueError(
                    "kernel should be a function if ker_param=='auto'")
            # discover best kernel parameter via EM
            # initialise precision matrices, as warm start
            self.precision_ = init_precision(emp_cov, mode=self.init)
            theta_old = 0
            for i in range(self.max_iter_ext):
                # E step - discover best kernel parameter
                theta = minimize_scalar(
                    objective_kernel,
                    args=(self.precision_, self.psi, self.kernel,
                          self.classes_[:, None]),
                    bounds=(0, emp_cov.shape[0]),
                    method="bounded",
                ).x

                if i > 0 and abs(theta_old - theta) < 1e-5:
                    break
                else:
                    print("Find new theta: %f" % theta)

                # M step
                try:
                    # this works if it is a ExpSineSquared or RBF kernel
                    kernel = self.kernel(length_scale=theta)(
                        self.classes_[:, None])
                except TypeError:
                    # maybe it's a ConstantKernel
                    kernel = self.kernel(constant_value=theta)(
                        self.classes_[:, None])

                out = kernel_time_graphical_lasso(
                    emp_cov,
                    alpha=self.alpha,
                    rho=self.rho,
                    kernel=kernel,
                    n_samples=n_samples,
                    tol=self.tol,
                    rtol=self.rtol,
                    psi=self.psi,
                    max_iter=self.max_iter,
                    verbose=self.verbose,
                    return_n_iter=True,
                    return_history=self.return_history,
                    update_rho_options=self.update_rho_options,
                    compute_objective=self.compute_objective,
                    init=self.precision_,
                )
                if self.return_history:
                    (self.precision_, self.covariance_, self.history_,
                     self.n_iter_) = out
                else:
                    self.precision_, self.covariance_, self.n_iter_ = out
                theta_old = theta
            else:
                print("warning: theta not converged")

        else:
            if callable(self.kernel):
                try:
                    # this works if it is a ExpSineSquared or RBF kernel
                    kernel = self.kernel(length_scale=self.ker_param)(
                        self.classes_[:, None])
                except TypeError:
                    # maybe it's a ConstantKernel
                    kernel = self.kernel(constant_value=self.ker_param)(
                        self.classes_[:, None])
            else:
                kernel = self.kernel
                if kernel.shape[0] != self.classes_.size:
                    raise ValueError(
                        "Kernel size does not match classes of samples, "
                        "got {} classes and kernel has shape {}".format(
                            self.classes_.size, kernel.shape[0]))

            out = kernel_time_graphical_lasso(
                emp_cov,
                alpha=self.alpha,
                rho=self.rho,
                kernel=kernel,
                n_samples=n_samples,
                tol=self.tol,
                rtol=self.rtol,
                psi=self.psi,
                max_iter=self.max_iter,
                verbose=self.verbose,
                return_n_iter=True,
                return_history=self.return_history,
                update_rho_options=self.update_rho_options,
                compute_objective=self.compute_objective,
                init=self.init,
            )
            if self.return_history:
                (self.precision_, self.covariance_, self.history_,
                 self.n_iter_) = out
            else:
                self.precision_, self.covariance_, self.n_iter_ = out

        return self
def latent_time_graphical_lasso(emp_cov,
                                alpha=0.01,
                                tau=1.,
                                rho=1.,
                                beta=1.,
                                eta=1.,
                                max_iter=100,
                                n_samples=None,
                                verbose=False,
                                psi='laplacian',
                                phi='laplacian',
                                mode='admm',
                                tol=1e-4,
                                rtol=1e-4,
                                return_history=False,
                                return_n_iter=True,
                                update_rho_options=None,
                                compute_objective=True,
                                init='empirical'):
    r"""Latent variable time-varying graphical lasso solver.

    Solves the following problem via ADMM:
      min sum_{i=1}^T -n_i log_likelihood(S_i, K_i-L_i) + alpha ||K_i||_{od,1}
          + tau ||L_i||_*
          + beta sum_{i=2}^T Psi(K_i - K_{i-1})
          + eta sum_{i=2}^T Phi(L_i - L_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, tau, beta, eta : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    phi, prox_phi, phi_node_penalty = check_norm_prox(phi)

    Z_0 = init_precision(emp_cov, mode=init)
    Z_1 = Z_0.copy()[:-1]
    Z_2 = Z_0.copy()[1:]
    W_0 = np.zeros_like(Z_0)
    W_1 = np.zeros_like(Z_1)
    W_2 = np.zeros_like(Z_2)

    X_0 = np.zeros_like(Z_0)
    X_1 = np.zeros_like(Z_1)
    X_2 = np.zeros_like(Z_2)
    U_1 = np.zeros_like(W_1)
    U_2 = np.zeros_like(W_2)

    R_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)
    W_1_old = np.zeros_like(W_1)
    W_2_old = np.zeros_like(W_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    if n_samples is None:
        n_samples = np.ones(emp_cov.shape[0])

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        A += A.transpose(0, 2, 1)
        A /= 2.
        A *= -rho / n_samples[:, None, None]
        A += emp_cov
        # A = emp_cov / rho - A

        R = np.array(
            [prox_logdet(a, lamda=ni / rho) for a, ni in zip(A, n_samples)])

        # update Z_0
        A = R + W_0 + X_0
        A[:-1] += Z_1 - X_1
        A[1:] += Z_2 - X_2
        A /= divisor[:, None, None]
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # Z_0 = np.array(map(soft_thresholding_, A))
        Z_0 = soft_thresholding(A,
                                lamda=alpha / (rho * divisor[:, None, None]))

        # update Z_1, Z_2
        A_1 = Z_0[:-1] + X_1
        A_2 = Z_0[1:] + X_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * beta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update W_0
        A = Z_0 - R - X_0
        A[:-1] += W_1 - U_1
        A[1:] += W_2 - U_2
        A /= divisor[:, None, None]
        A += A.transpose(0, 2, 1)
        A /= 2.

        W_0 = np.array([
            prox_trace_indicator(a, lamda=tau / (rho * div))
            for a, div in zip(A, divisor)
        ])

        # update W_1, W_2
        A_1 = W_0[:-1] + U_1
        A_2 = W_0[1:] + U_2
        if not phi_node_penalty:
            prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho)
            W_1 = .5 * (A_1 + A_2 - prox_e)
            W_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            W_1, W_2 = prox_phi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * eta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        X_0 += R - Z_0 + W_0
        X_1 += Z_0[:-1] - Z_1
        X_2 += Z_0[1:] - Z_2
        U_1 += W_0[:-1] - W_1
        U_2 += W_0[1:] - W_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(R - Z_0 + W_0) + squared_norm(Z_0[:-1] - Z_1) +
            squared_norm(Z_0[1:] - Z_2) + squared_norm(W_0[:-1] - W_1) +
            squared_norm(W_0[1:] - W_2))

        snorm = rho * np.sqrt(
            squared_norm(R - R_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old) + squared_norm(W_1 - W_1_old) +
            squared_norm(W_2 - W_2_old))

        obj = objective(emp_cov, n_samples, R, Z_0, Z_1, Z_2, W_0, W_1, W_2,
                        alpha, tau, beta, eta, psi, phi) \
            if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * max(
                np.sqrt(
                    squared_norm(R) + squared_norm(Z_1) + squared_norm(Z_2) +
                    squared_norm(W_1) + squared_norm(W_2)),
                np.sqrt(
                    squared_norm(Z_0 - W_0) + squared_norm(Z_0[:-1]) +
                    squared_norm(Z_0[1:]) + squared_norm(W_0[:-1]) +
                    squared_norm(W_0[1:]))),
            e_dual=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * rho *
            (np.sqrt(
                squared_norm(X_0) + squared_norm(X_1) + squared_norm(X_2) +
                squared_norm(U_1) + squared_norm(U_2))))

        R_old = R.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()
        W_1_old = W_1.copy()
        W_2_old = W_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        X_0 *= rho / rho_new
        X_1 *= rho / rho_new
        X_2 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, W_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
예제 #6
0
    def fit(self, X, y):
        # Covariance does not make sense for a single feature
        X, y = check_X_y(X, y, accept_sparse=False, dtype=np.float64, order="C", ensure_min_features=2, estimator=self)

        self.classes_, n_samples = np.unique(y, return_counts=True)
        self.data = X.copy()
        if np.unique(self.data).size != 2:
            raise ValueError(
                "Using the ising distribution your data has " "to contain only two values, either 0 and 1 " "or -1, 1"
            )
        X = np.array([X[y == cl] for cl in self.classes_])

        if self.ker_param == "auto":
            from scipy.optimize import minimize_scalar

            if not callable(self.kernel):
                raise ValueError("kernel should be a function if ker_param=='auto'")
            # discover best kernel parameter via alternating minimization
            # initialise precision matrices, as warm start
            self.precision_ = init_precision(X, mode=self.init)
            theta_old = 0
            for i in range(self.max_iter_ext):
                # E step - discover best kernel parameter
                theta = minimize_scalar(
                    objective_kernel,
                    args=(self.precision_, self.psi, self.kernel, self.classes_[:, None]),
                    bounds=(0, X.shape[0]),
                    method="bounded",
                ).x

                if i > 0 and abs(theta_old - theta) < 1e-5:
                    break
                else:
                    print("Find new theta: %f" % theta)

                # M step
                try:
                    # this works if it is a ExpSineSquared or RBF kernel
                    kernel = self.kernel(length_scale=theta)(self.classes_[:, None])
                except TypeError:
                    # maybe it's a ConstantKernel
                    kernel = self.kernel(constant_value=theta)(self.classes_[:, None])

                out = _fit_time_ising_model(
                    X,
                    alpha=self.alpha,
                    rho=self.rho,
                    kernel=kernel,
                    tol=self.tol,
                    rtol=self.rtol,
                    psi=self.psi,
                    max_iter=self.max_iter,
                    verbose=self.verbose,
                    return_n_iter=True,
                    return_history=self.return_history,
                    compute_objective=self.compute_objective,
                    n_cores=self.n_cores,
                )
                if self.return_history:
                    self.precision_, self.history_, self.n_iter_ = out
                else:
                    self.precision_, self.n_iter_ = out
                theta_old = theta
            else:
                print("warning: theta not converged")

        else:
            if callable(self.kernel):
                try:
                    # this works if it is a ExpSineSquared or RBF kernel
                    kernel = self.kernel(length_scale=self.ker_param)(self.classes_[:, None])
                except TypeError:
                    # maybe it's a ConstantKernel
                    kernel = self.kernel(constant_value=self.ker_param)(self.classes_[:, None])
            else:
                kernel = self.kernel
                if kernel.shape[0] != self.classes_.size:
                    raise ValueError(
                        "Kernel size does not match classes of samples, "
                        "got {} classes and kernel has shape {}".format(self.classes_.size, kernel.shape[0])
                    )
            out = _fit_time_ising_model(
                X,
                alpha=self.alpha,
                rho=self.rho,
                kernel=kernel,
                tol=self.tol,
                rtol=self.rtol,
                psi=self.psi,
                max_iter=self.max_iter,
                verbose=self.verbose,
                return_n_iter=True,
                return_history=self.return_history,
                compute_objective=self.compute_objective,
            )
            if self.return_history:
                self.precision_, self.history_, self.n_iter_ = out
            else:
                self.precision_, self.n_iter_ = out

        return self