Ejemplo n.º 1
0
def cholesky(A, sparse=True, verbose=True):
    """
    Choose the best possible cholesky factorizor.

    if possible, import the Scikit-Sparse sparse Cholesky method.
    Permutes the output L to ensure A = L.H . L

    otherwise defaults to numpy's non-sparse version

    Parameters
    ----------
    A : array-like
        array to decompose
    sparse : boolean, default: True
        whether to return a sparse array
    verbose : bool, default: True
        whether to print warnings
    """
    if SKSPIMPORT:
        A = sp.sparse.csc_matrix(A)
        try:
            F = spcholesky(A)

            # permutation matrix P
            P = sp.sparse.lil_matrix(A.shape)
            p = F.P()
            P[np.arange(len(p)), p] = 1

            # permute
            L = F.L()
            L = P.T.dot(L)
        except CholmodNotPositiveDefiniteError as e:
            raise NotPositiveDefiniteError('Matrix is not positive definite')

        if sparse:
            return L.T  # upper triangular factorization
        return L.T.A  # upper triangular factorization

    else:
        msg = 'Could not import Scikit-Sparse or Suite-Sparse.\n'\
              'This will slow down optimization for models with '\
              'monotonicity/convexity penalties and many splines.\n'\
              'See installation instructions for installing '\
              'Scikit-Sparse and Suite-Sparse via Conda.'
        if verbose:
            warnings.warn(msg)

        if sp.sparse.issparse(A):
            A = A.A

        try:
            L = sp.linalg.cholesky(A, lower=False)
        except LinAlgError as e:
            raise NotPositiveDefiniteError('Matrix is not positive definite')

        if sparse:
            return sp.sparse.csc_matrix(L)
        return L
Ejemplo n.º 2
0
def cholesky(A, sparse=True):
    """
    Choose the best possible cholesky factorizor.

    if possible, import the Scikit-Sparse sparse Cholesky method.
    Permutes the output L to ensure A = L . L.H

    otherwise defaults to numpy's non-sparse version

    Parameters
    ----------
    A : array-like
        array to decompose
    sparse : boolean, default: True
        whether to return a sparse array
    """
    if SKSPIMPORT:
        A = sp.sparse.csc_matrix(A)
        F = spcholesky(A)

        # permutation matrix P
        P = sp.sparse.lil_matrix(A.shape)
        p = F.P()
        P[np.arange(len(p)), p] = 1

        # permute
        try:
            L = F.L()
            L = P.T.dot(L)
        except CholmodNotPositiveDefiniteError as e:
            raise NotPositiveDefiniteError('Matrix is not positive definite')

        if sparse:
            return L
        return L.todense()

    else:
        msg = 'Could not import Scikit-Sparse or Suite-Sparse.\n'\
              'This will slow down optimization for models with '\
              'monotonicity/convexity penalties and many splines.\n'\
              'See installation instructions for installing '\
              'Scikit-Sparse and Suite-Sparse via Conda.'
        warnings.warn(msg)

        if sp.sparse.issparse(A):
            A = A.todense()

        try:
            L = np.linalg.cholesky(A)
        except LinAlgError as e:
            raise NotPositiveDefiniteError('Matrix is not positive definite')

        if sparse:
            return sp.sparse.csc_matrix(L)
        return L
Ejemplo n.º 3
0
    def fit(self, X, y, w=None):
        """
            Run Forest!

            Args:
                X (m x n): dataset
                y (n, 1): labels (real)

            Returns:
                w (n): estimated parameters
                t (float): elapsed time
        """
        assert X.shape[0] == y.shape[0]
        y.reshape(len(y), )

        # initializing
        if w is None:
            w = np.zeros((X.shape[1]))
        z_1 = np.zeros((X.shape[1], 1))
        z_2 = np.zeros((X.shape[1], 1))
        u_1 = np.zeros((X.shape[1], 1))  # w - z_1
        u_2 = np.zeros((X.shape[1], 1))  # w - z_2

        # ADMM loop
        keep_going = True
        n_iter = 0
        hist = list()
        prim_res = list()
        start = time.time()

        # cache factorization and other pre-computable terms
        if self.cache:
            if self.sparse:
                temp = scipy.sparse.csc_matrix(np.dot(X.T, X))
                L = spcholesky(temp, beta=self.rho)
            else:
                L = cholesky(np.dot(X.T, X) + self.rho * np.eye(X.shape[1]),
                             lower=True)
            q = np.dot(X.T, y)
            q.shape = (q.size, 1)

        while keep_going and (n_iter <= self.max_iter):
            n_iter += 1
            # w-update step
            if self.cache:
                g = 0.5 * self.rho * (z_1 + z_2 - u_1 - u_2) + q
                # back-solving
                if self.sparse:
                    w = L.solve_Lt(L.solve_A(g))
                else:
                    w = scipy.linalg.solve(L.T, scipy.linalg.solve(L, g))
            else:

                def fun(var):
                    temp_1 = (0.5 * self.rho) * np.linalg.norm(w - z_1 + u_1)
                    temp_2 = (0.5 * self.rho) * np.linalg.norm(w - z_2 + u_2)
                    return f(X, y, var) + temp_1 + temp_2

                opt_resul = minimize(fun, w, method='bfgs')
                w = opt_resul.x
                w.shape = (w.shape[0], 1)

            # z_i update step - these sub-steps can be done in parallel
            z_1_old = z_1.copy()
            z_2_old = z_2.copy()

            x1_hat = self.alpha * w + (1 - self.alpha) * z_1_old
            z_1 = prxopt.shrinkage(x1_hat + u_1, self.lamb / self.rho)

            x2_hat = self.alpha * w + (1 - self.alpha) * z_2_old
            z_2 = prxopt.proximal_constrained(x2_hat + u_2)  # proj em C

            # dual variables update step
            u_1 = u_1 + (x1_hat - z_1)  # dual variable for the 1st constraint
            u_2 = u_2 + (x2_hat - z_2)  # dual variable for the 2nd constraint

            # monitor costs over time
            if PLOT_COST:
                hist.append(((np.dot(X, w) - y)**2).sum() + self.lamb *
                            (abs(w).sum()))

            # critério de parada
            primal_res = 0.5 * ((w - z_1) + (w - z_2))
            primal_res_norm = np.linalg.norm(primal_res)

            # monitor primal residual over time
            prim_res.append(primal_res_norm)

            dual_res = 0.5 * ((z_1 - z_1_old) + (z_2 - z_2_old))
            dual_res_norm = np.linalg.norm(self.rho * dual_res)
            eps_pri = np.sqrt(X.shape[1]) * ABS_TOL + REL_TOL * \
                          max(np.linalg.norm(w),
                              0.5*(np.linalg.norm(-z_1)+np.linalg.norm(-z_2)))
            eps_dual = np.sqrt(X.shape[1]) * ABS_TOL + REL_TOL * 0.5 * \
                        (np.linalg.norm(u_1) + np.linalg.norm(u_2))
            # variação de rho, baseado em Boyd, 2011, ADMM, pg. 20
            if self.vary:
                if primal_res_norm > self.mu * dual_res_norm:
                    self.rho *= self.tau_incr
                    u_1 = u_1 / self.tau_incr
                    u_2 = u_2 / self.tau_incr
                elif dual_res_norm > self.mu * primal_res_norm:
                    self.rho *= (1 / self.tau_decr)
                    u_1 = u_1 * self.tau_decr
                    u_2 = u_2 * self.tau_decr

            # print informações de convergência?
            if VERBOSE:
                message = """|ADMM it.{} |r_norm: {:8.3f} |eps_pri: {:1.3f}
                          |s_norm: {:8.3f} |eps_dual: {:1.3f} |obj: {:8.3f}"""

                value = ((1/2) * np.linalg.norm(np.dot(X, w) - y) ** 2 +\
                                self.lamb * np.sum(abs(w)))
                print(
                    message.format(n_iter, primal_res_norm, eps_pri,
                                   dual_res_norm, eps_dual, value))

            if primal_res_norm <= eps_pri and dual_res_norm <= eps_dual:
                keep_going = False
                if VERBOSE:
                    print('Primal dual stopping criterion met.')

        # make sure it's feasible (w_i >= 0, i=1,...,d)
        # verify whether the constraint satisfiability is required when doing
        # early stop (few max_iters). If not, maybe is better to comment it out
        # as this project might be to harsh for early-stage solutions
        self.hist = hist
        # w_final = prxopt.proximal_constrained(w)
        w_final = z_2.copy()

        if PLOT_COST:
            # cost function
            plt.subplot(2, 1, 1)
            plt.plot(hist)
            plt.xlabel('iterations')
            plt.ylabel('Cost function')

            # primal residual: average of both residuals (one for constraint)
            plt.subplot(2, 1, 2)
            plt.semilogy(prim_res)
            plt.xlabel('iterations')
            plt.ylabel('Primal residual - Avg')
            plt.show(block=False)
        return w_final.flatten(), time.time() - start