Ejemplo n.º 1
0
    def test_spmv_hpmv(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES+COMPLEX_DTYPES):
            n = 3
            A = rand(n, n).astype(dtype)
            if ind > 1:
                A += rand(n, n)*1j
            A = A.astype(dtype)
            A = A + A.T if ind < 4 else A + A.conj().T
            c, r = tril_indices(n)
            Ap = A[r, c]
            x = rand(n).astype(dtype)
            y = rand(n).astype(dtype)
            xlong = arange(2*n).astype(dtype)
            ylong = ones(2*n).astype(dtype)
            alpha, beta = dtype(1.25), dtype(2)

            if ind > 3:
                func, = get_blas_funcs(('hpmv',), dtype=dtype)
            else:
                func, = get_blas_funcs(('spmv',), dtype=dtype)
            y1 = func(n=n, alpha=alpha, ap=Ap, x=x, y=y, beta=beta)
            y2 = alpha * A.dot(x) + beta * y
            assert_array_almost_equal(y1, y2)

            # Test inc and offsets
            y1 = func(n=n-1, alpha=alpha, beta=beta, x=xlong, y=ylong, ap=Ap,
                      incx=2, incy=2, offx=n, offy=n)
            y2 = (alpha * A[:-1, :-1]).dot(xlong[3::2]) + beta * ylong[3::2]
            assert_array_almost_equal(y1[3::2], y2)
            assert_almost_equal(y1[4], ylong[4])
Ejemplo n.º 2
0
    def test_sbmv_hbmv(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES):
            n = 6
            k = 2
            A = zeros((n, n), dtype=dtype)
            Ab = zeros((k+1, n), dtype=dtype)

            # Form the array and its packed banded storage
            A[arange(n), arange(n)] = rand(n)
            for ind2 in range(1, k+1):
                temp = rand(n-ind2)
                A[arange(n-ind2), arange(ind2, n)] = temp
                Ab[-1-ind2, ind2:] = temp
            A = A.astype(dtype)
            A = A + A.T if ind < 2 else A + A.conj().T
            Ab[-1, :] = diag(A)
            x = rand(n).astype(dtype)
            y = rand(n).astype(dtype)
            alpha, beta = dtype(1.25), dtype(3)

            if ind > 1:
                func, = get_blas_funcs(('hbmv',), dtype=dtype)
            else:
                func, = get_blas_funcs(('sbmv',), dtype=dtype)
            y1 = func(k=k, alpha=alpha, a=Ab, x=x, y=y, beta=beta)
            y2 = alpha * A.dot(x) + beta * y
            assert_array_almost_equal(y1, y2)
Ejemplo n.º 3
0
    def test_spr_hpr(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES+COMPLEX_DTYPES):
            n = 3
            A = rand(n, n).astype(dtype)
            if ind > 1:
                A += rand(n, n)*1j
            A = A.astype(dtype)
            A = A + A.T if ind < 4 else A + A.conj().T
            c, r = tril_indices(n)
            Ap = A[r, c]
            x = rand(n).astype(dtype)
            alpha = (DTYPES+COMPLEX_DTYPES)[mod(ind, 4)](2.5)

            if ind > 3:
                func, = get_blas_funcs(('hpr',), dtype=dtype)
                y2 = alpha * x[:, None].dot(x[None, :].conj()) + A
            else:
                func, = get_blas_funcs(('spr',), dtype=dtype)
                y2 = alpha * x[:, None].dot(x[None, :]) + A

            y1 = func(n=n, alpha=alpha, ap=Ap, x=x)
            y1f = zeros((3, 3), dtype=dtype)
            y1f[r, c] = y1
            y1f[c, r] = y1.conj() if ind > 3 else y1
            assert_array_almost_equal(y1f, y2)
Ejemplo n.º 4
0
    def test_spr2_hpr2(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES):
            n = 3
            A = rand(n, n).astype(dtype)
            if ind > 1:
                A += rand(n, n)*1j
            A = A.astype(dtype)
            A = A + A.T if ind < 2 else A + A.conj().T
            c, r = tril_indices(n)
            Ap = A[r, c]
            x = rand(n).astype(dtype)
            y = rand(n).astype(dtype)
            alpha = dtype(2)

            if ind > 1:
                func, = get_blas_funcs(('hpr2',), dtype=dtype)
            else:
                func, = get_blas_funcs(('spr2',), dtype=dtype)

            u = alpha.conj() * x[:, None].dot(y[None, :].conj())
            y2 = A + u + u.conj().T
            y1 = func(n=n, alpha=alpha, x=x, y=y, ap=Ap)
            y1f = zeros((3, 3), dtype=dtype)
            y1f[r, c] = y1
            y1f[[1, 2, 2], [0, 0, 1]] = y1[[1, 3, 4]].conj()
            assert_array_almost_equal(y1f, y2)
Ejemplo n.º 5
0
def _have_blas_gemm():
    try:
        linalg.get_blas_funcs(["gemm"])
        return True
    except (AttributeError, ValueError):
        warnings.warn("Could not import BLAS, falling back to np.dot")
        return False
Ejemplo n.º 6
0
def test_get_blas_funcs():
    # check that it returns Fortran code for arrays that are
    # fortran-ordered
    f1, f2, f3 = get_blas_funcs(
        ('axpy', 'axpy', 'axpy'),
        (np.empty((2,2), dtype=np.complex64, order='F'),
         np.empty((2,2), dtype=np.complex128, order='C'))
        )

    # get_blas_funcs will choose libraries depending on most generic
    # array
    assert_equal(f1.typecode, 'z')
    assert_equal(f1.module_name, 'cblas')
    assert_equal(f2.typecode, 'z')
    assert_equal(f2.module_name, 'cblas')

    # check defaults.
    f1 = get_blas_funcs('rotg')
    assert_equal(f1.typecode, 'd')

    # check also dtype interface
    f1 = get_blas_funcs('gemm', dtype=np.complex64)
    assert_equal(f1.typecode, 'c')
    f1 = get_blas_funcs('gemm', dtype='F')
    assert_equal(f1.typecode, 'c')
Ejemplo n.º 7
0
def test_get_blas_funcs_alias():
    # check alias for get_blas_funcs
    f, g = get_blas_funcs(('nrm2', 'dot'), dtype=np.complex64)
    assert f.typecode == 'c'
    assert g.typecode == 'c'

    f, g, h = get_blas_funcs(('dot', 'dotc', 'dotu'), dtype=np.float64)
    assert f is g
    assert f is h
Ejemplo n.º 8
0
def test_get_blas_funcs_alias():
    # check alias for get_blas_funcs
    f, g = get_blas_funcs(("nrm2", "dot"), dtype=np.complex64)
    assert f.typecode == "c"
    assert g.typecode == "c"

    f, g, h = get_blas_funcs(("dot", "dotc", "dotu"), dtype=np.float64)
    assert f is g
    assert f is h
Ejemplo n.º 9
0
def py_rect_maxvol(A, tol = 1., maxK = None, min_add_K = None, minK = None, start_maxvol_iters = 10, identity_submatrix = True):
    """Python implementation of rectangular 2-volume maximization. For information see :py:func:`rect_maxvol` function"""
    # tol2 - square of parameter tol
    tol2 = tol**2
    # N - number of rows, r - number of columns of matrix A
    N, r = A.shape
    if N <= r:
        return np.arange(N, dtype = np.int32), np.eye(N, dtype = A.dtype)
    if maxK is None or maxK > N:
        maxK = N
    if maxK < r:
        maxK = r
    if minK is None or minK < r:
        minK = r
    if minK > N:
        minK = N
    if min_add_K is not None:
        minK = max(minK, r + min_add_K) 
    if minK > maxK:
        minK = maxK
        #raise ValueError('minK value cannot be greater than maxK value')
    index = np.zeros(N, dtype = np.int32)
    chosen = np.ones(N)
    tmp_index, C = py_maxvol(A, tol = 1, max_iters = start_maxvol_iters)
    index[:r] = tmp_index
    chosen[tmp_index] = 0
    C = np.asfortranarray(C)
    # compute square 2-norms of each row in matrix C
    row_norm_sqr = np.array([chosen[i]*np.linalg.norm(C[i], 2)**2 for i in xrange(N)])
    # find maximum value in row_norm_sqr
    i = np.argmax(row_norm_sqr)
    K = r
    # set cgeru or zgeru for complex numbers and dger or sger for float numbers
    try:
        ger = get_blas_funcs('geru', [C])
    except:
        ger = get_blas_funcs('ger', [C])
    while (row_norm_sqr[i] > tol2 and K < maxK) or K < minK:
        # add i to index and recompute C and square norms of each row by SVM-formula
        index[K] = i
        chosen[i] = 0
        c = C[i].copy()
        v = C.dot(c.conj())
        l = 1.0/(1+v[i])
        ger(-l,v,c,a=C,overwrite_a=1)
        C = np.hstack([C, l*v.reshape(-1,1)])
        row_norm_sqr -= (l*v*v.conj()).real
        row_norm_sqr *= chosen
        # find maximum value in row_norm_sqr
        i = row_norm_sqr.argmax()
        K += 1
    if identity_submatrix:
        C[index[:K]] = np.eye(K, dtype = C.dtype)
    return index[:K].copy(), C
Ejemplo n.º 10
0
def test_fast_dot():
    """Check fast dot blas wrapper function"""
    rng = np.random.RandomState(42)
    A = rng.random_sample([2, 10])
    B = rng.random_sample([2, 10])

    try:
        linalg.get_blas_funcs('gemm')
        has_blas = True
    except AttributeError, ValueError:
        has_blas = False
Ejemplo n.º 11
0
def test_inplace_swap_column():
    X = np.array([[0, 3, 0],
                  [2, 4, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float64)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)

    swap = linalg.get_blas_funcs(('swap',), (X,))
    swap = swap[0]
    X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1])
    inplace_swap_column(X_csr, 0, -1)
    inplace_swap_column(X_csc, 0, -1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())

    X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1])
    inplace_swap_column(X_csr, 0, 1)
    inplace_swap_column(X_csc, 0, 1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    assert_raises(TypeError, inplace_swap_column, X_csr.tolil())

    X = np.array([[0, 3, 0],
                  [2, 4, 0],
                  [0, 0, 0],
                  [9, 8, 7],
                  [4, 0, 5]], dtype=np.float32)
    X_csr = sp.csr_matrix(X)
    X_csc = sp.csc_matrix(X)
    swap = linalg.get_blas_funcs(('swap',), (X,))
    swap = swap[0]
    X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1])
    inplace_swap_column(X_csr, 0, -1)
    inplace_swap_column(X_csc, 0, -1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1])
    inplace_swap_column(X_csr, 0, 1)
    inplace_swap_column(X_csc, 0, 1)
    assert_array_equal(X_csr.toarray(), X_csc.toarray())
    assert_array_equal(X, X_csc.toarray())
    assert_array_equal(X, X_csr.toarray())
    assert_raises(TypeError, inplace_swap_column, X_csr.tolil())
Ejemplo n.º 12
0
    def test_gbmv(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES):
            n = 7
            m = 5
            kl = 1
            ku = 2
            # fake a banded matrix via toeplitz
            A = toeplitz(append(rand(kl+1), zeros(m-kl-1)),
                         append(rand(ku+1), zeros(n-ku-1)))
            A = A.astype(dtype)
            Ab = zeros((kl+ku+1, n), dtype=dtype)

            # Form the banded storage
            Ab[2, :5] = A[0, 0]  # diag
            Ab[1, 1:6] = A[0, 1]  # sup1
            Ab[0, 2:7] = A[0, 2]  # sup2
            Ab[3, :4] = A[1, 0]  # sub1

            x = rand(n).astype(dtype)
            y = rand(m).astype(dtype)
            alpha, beta = dtype(3), dtype(-5)

            func, = get_blas_funcs(('gbmv',), dtype=dtype)
            y1 = func(m=m, n=n, ku=ku, kl=kl, alpha=alpha, a=Ab,
                      x=x, y=y, beta=beta)
            y2 = alpha * A.dot(x) + beta * y
            assert_array_almost_equal(y1, y2)
Ejemplo n.º 13
0
    def _update_dict_slow(self, subset, D_range):
        """Update dictionary from statistic
        Parameters
        ----------
        subset: ndarray (len_subset),
            Mask used on X

        """
        D_subset = self.D_[:, subset]

        if self.projection == "full":
            norm = enet_norm(self.D_, self.l1_ratio)
        else:
            norm = enet_norm(D_subset, self.l1_ratio)
        R = self.B_[:, subset] - np.dot(D_subset.T, self.A_).T

        ger, = linalg.get_blas_funcs(("ger",), (self.A_, D_subset))
        for k in D_range:
            ger(1.0, self.A_[k], D_subset[k], a=R, overwrite_a=True)
            # R += np.dot(stat.A[:, j].reshape(n_components, 1),
            D_subset[k] = R[k] / (self.A_[k, k])
            if self.projection == "full":
                self.D_[k][subset] = D_subset[k]
                self.D_[k] = enet_projection(self.D_[k], norm[k], self.l1_ratio)
                D_subset[k] = self.D_[k][subset]
            else:
                D_subset[k] = enet_projection(D_subset[k], norm[k], self.l1_ratio)
            ger(-1.0, self.A_[k], D_subset[k], a=R, overwrite_a=True)
            # R -= np.dot(stat.A[:, j].reshape(n_components, 1),
        if self.projection == "partial":
            self.D_[:, subset] = D_subset
Ejemplo n.º 14
0
def axpy(x, y, a=1.0):
    """
    Quick level-1 call to BLAS
    y = a*x+y

    Parameters
    ----------
    x : array_like
        nx1 real or complex vector
    y : array_like
        nx1 real or complex vector
    a : float
        real or complex scalar

    Returns
    -------
    y : array_like
        Input variable y is rewritten

    Notes
    -----
    The call to get_blas_funcs automatically determines the prefix for the blas
    call.
    """
    from scipy.linalg import get_blas_funcs

    fn = get_blas_funcs(['axpy'], [x, y])[0]
    fn(x, y, a)
Ejemplo n.º 15
0
def cool_syrk(fact, X): 
    syrk = get_blas_funcs("syrk", [X])
    R = syrk(fact, X)
    d = np.diag(R).copy()
    size = mat_to_upper_F(R)
    R.resize([size,])
    return R,d
Ejemplo n.º 16
0
def _update_dict_slow(X, A, B, G, Q, Q_idx, idx, fit_intercept,
                      components_range, norm, impute=True):
    Q_idx = Q[:, idx]

    if impute:
        old_sub_G = Q_idx.dot(Q_idx.T)

    ger, = linalg.get_blas_funcs(('ger',), (A, Q_idx))
    R = B[:, idx] - np.dot(Q_idx.T, A).T

    # norm = np.sqrt(np.sum(Q_idx ** 2, axis=1))
    norm = np.sqrt(np.sum(Q ** 2, axis=1))
    # Intercept on first column
    for j in components_range:
        ger(1.0, A[j], Q_idx[j], a=R, overwrite_a=True)
        Q_idx[j] = R[j] / A[j, j]
        # new_norm = np.sqrt(np.sum(Q_idx[j] ** 2))
        # if new_norm > norm[j]:
        #     Q_idx[j] /= new_norm / norm[j]
        Q[j, idx] = Q_idx[j]
        new_norm = np.sqrt(np.sum(Q[j] ** 2))
        if new_norm > 1:
            Q_idx[j] /= new_norm
            Q[j] /= new_norm

        ger(-1.0, A[j], Q_idx[j], a=R, overwrite_a=True)

    Q[:, idx] = Q_idx

    if impute:
        G += Q_idx.dot(Q_idx.T) - old_sub_G
Ejemplo n.º 17
0
def dot(A, B, out=None):
    """
    A drop in replacement for numpy.dot.
    Computes A.B optimized using fblas calls.
   """
    import scipy.linalg as sp
    gemm = sp.get_blas_funcs('gemm', arrays=(A,B))

    if out is None:
        lda, x, y, ldb = A.shape + B.shape
        if x != y:
            raise ValueError("matrices are not aligned")
        dtype = np.max([x.dtype for x in (A, B)])
        out = np.empty((lda, ldb), dtype, order='F')

    if A.flags.c_contiguous and B.flags.c_contiguous:
        gemm(alpha=1., a=A.T, b=B.T, trans_a=True, trans_b=True, c=out, overwrite_c=True)
    if A.flags.c_contiguous and B.flags.f_contiguous:
        gemm(alpha=1., a=A.T, b=B, trans_a=True, c=out, overwrite_c=True)
    if A.flags.f_contiguous and B.flags.c_contiguous:
        gemm(alpha=1., a=A, b=B.T, trans_b=True, c=out, overwrite_c=True)
    if A.flags.f_contiguous and B.flags.f_contiguous:
        gemm(alpha=1., a=A, b=B, c=out, overwrite_c=True)

    return out
Ejemplo n.º 18
0
def norm2(q):
    """
    Compute the euclidean norm of an array ``q`` by calling the BLAS routine
    """
    q = np.asarray(q)
    nrm2 = get_blas_funcs('nrm2', dtype=q.dtype)
    return nrm2(q)
Ejemplo n.º 19
0
    def test_trsv(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES):
            n = 15
            A = (rand(n, n)+eye(n)).astype(dtype)
            x = rand(n).astype(dtype)
            func, = get_blas_funcs(('trsv',), dtype=dtype)

            y1 = func(a=A, x=x)
            y2 = solve(triu(A), x)
            assert_array_almost_equal(y1, y2)

            y1 = func(a=A, x=x, lower=1)
            y2 = solve(tril(A), x)
            assert_array_almost_equal(y1, y2)

            y1 = func(a=A, x=x, diag=1)
            A[arange(n), arange(n)] = dtype(1)
            y2 = solve(triu(A), x)
            assert_array_almost_equal(y1, y2)

            y1 = func(a=A, x=x, diag=1, trans=1)
            y2 = solve(triu(A).T, x)
            assert_array_almost_equal(y1, y2)

            y1 = func(a=A, x=x, diag=1, trans=2)
            y2 = solve(triu(A).conj().T, x)
            assert_array_almost_equal(y1, y2)
Ejemplo n.º 20
0
    def test_tpsv(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES):
            n = 10
            x = rand(n).astype(dtype)
            # Upper triangular array
            A = triu(rand(n, n)) if ind < 2 else triu(rand(n, n)+rand(n, n)*1j)
            A += eye(n)
            # Form the packed storage
            c, r = tril_indices(n)
            Ap = A[r, c]
            func, = get_blas_funcs(('tpsv',), dtype=dtype)

            y1 = func(n=n, ap=Ap, x=x)
            y2 = solve(A, x)
            assert_array_almost_equal(y1, y2)

            y1 = func(n=n, ap=Ap, x=x, diag=1)
            A[arange(n), arange(n)] = dtype(1)
            y2 = solve(A, x)
            assert_array_almost_equal(y1, y2)

            y1 = func(n=n, ap=Ap, x=x, diag=1, trans=1)
            y2 = solve(A.T, x)
            assert_array_almost_equal(y1, y2)

            y1 = func(n=n, ap=Ap, x=x, diag=1, trans=2)
            y2 = solve(A.conj().T, x)
            assert_array_almost_equal(y1, y2)
Ejemplo n.º 21
0
    def _solve(v, alpha, cs, ds):
        """Evaluate w = M^-1 v"""
        if len(cs) == 0:
            return v/alpha

        # (B + C D^H)^-1 = B^-1 - B^-1 C (I + D^H B^-1 C)^-1 D^H B^-1

        axpy, dotc = get_blas_funcs(['axpy', 'dotc'], cs[:1] + [v])

        c0 = cs[0]
        A = alpha * np.identity(len(cs), dtype=c0.dtype)
        for i, d in enumerate(ds):
            for j, c in enumerate(cs):
                A[i,j] += dotc(d, c)

        q = np.zeros(len(cs), dtype=c0.dtype)
        for j, d in enumerate(ds):
            q[j] = dotc(d, v)
        q /= alpha
        q = solve(A, q)

        w = v/alpha
        for c, qc in zip(cs, q):
            w = axpy(c, w, w.size, -qc)

        return w
Ejemplo n.º 22
0
def _mixed_norm_solver_bcd(M, G, alpha, lipschitz_constant, maxit=200,
                           tol=1e-8, verbose=None, init=None, n_orient=1,
                           dgap_freq=10):
    """Solve L21 inverse problem with block coordinate descent."""
    n_sensors, n_times = M.shape
    n_sensors, n_sources = G.shape
    n_positions = n_sources // n_orient

    if init is None:
        X = np.zeros((n_sources, n_times))
        R = M.copy()
    else:
        X = init
        R = M - np.dot(G, X)

    E = []  # track primal objective function
    highest_d_obj = - np.inf
    active_set = np.zeros(n_sources, dtype=np.bool)  # start with full AS

    alpha_lc = alpha / lipschitz_constant

    # First make G fortran for faster access to blocks of columns
    G = np.asfortranarray(G)
    # It is better to call gemm here
    # so it is called only once
    gemm = linalg.get_blas_funcs("gemm", [R.T, G[:, 0:n_orient]])
    one_ovr_lc = 1. / lipschitz_constant

    # assert that all the multiplied matrices are fortran contiguous
    assert X.T.flags.f_contiguous
    assert R.T.flags.f_contiguous
    assert G.flags.f_contiguous
    # storing list of contiguous arrays
    list_G_j_c = []
    for j in range(n_positions):
        idx = slice(j * n_orient, (j + 1) * n_orient)
        list_G_j_c.append(np.ascontiguousarray(G[:, idx]))

    for i in range(maxit):
        _bcd(G, X, R, active_set, one_ovr_lc, n_orient, n_positions,
             alpha_lc, gemm, list_G_j_c)

        if (i + 1) % dgap_freq == 0:
            _, p_obj, d_obj, _ = dgap_l21(M, G, X[active_set], active_set,
                                          alpha, n_orient)
            highest_d_obj = max(d_obj, highest_d_obj)
            gap = p_obj - highest_d_obj
            E.append(p_obj)
            logger.debug("Iteration %d :: p_obj %f :: dgap %f :: n_active %d" %
                         (i + 1, p_obj, gap, np.sum(active_set) / n_orient))

            if gap < tol:
                logger.debug('Convergence reached ! (gap: %s < %s)'
                             % (gap, tol))
                break

    X = X[active_set]

    return X, active_set, E
Ejemplo n.º 23
0
 def _matvec(v, alpha, cs, ds):
     axpy, scal, dotc = get_blas_funcs(['axpy', 'scal', 'dotc'],
                                       cs[:1] + [v])
     w = alpha * v
     for c, d in zip(cs, ds):
         a = dotc(d, v)
         w = axpy(c, w, w.size, a)
     return w
Ejemplo n.º 24
0
    def _calc_lr(self, x, tmp, calc_l=False, A1=None, A2=None, rescale=True,
                 max_itr=1000, rtol=1E-14, atol=1E-14):
        """Power iteration to obtain eigenvector corresponding to largest
           eigenvalue.
           
           x is modified in place.
        """        
        if A1 is None:
            A1 = self.A
        if A2 is None:
            A2 = self.A
                        
        try:
            norm = la.get_blas_funcs("nrm2", [x])
        except (ValueError, AttributeError):
            norm = np.linalg.norm
            
#        try:
#            allclose = ac.allclose_mat
#        except:
#            allclose = np.allclose
#            print "Falling back to numpy allclose()!"
        
        n = x.size #we will scale x so that stuff doesn't get too small

        x *= n / norm(x.ravel())
        tmp[:] = x
        for i in xrange(max_itr):
            x[:] = tmp
            if calc_l:
                tm.eps_l_noop_inplace(x, A1, A2, tmp)
            else:
                tm.eps_r_noop_inplace(x, A1, A2, tmp)
            ev_mag = norm(tmp.ravel()) / n
            ev = (tmp.mean() / x.mean()).real
            tmp *= (1 / ev_mag)
            if norm((tmp - x).ravel()) < atol + rtol * n:
#            if allclose(tmp, x, rtol, atol):                
                #print (i, ev, ev_mag, norm((tmp - x).ravel())/n, atol, rtol)
                x[:] = tmp
                break            
#        else:
#            print (i, ev, ev_mag, norm((tmp - x).ravel())/norm(x.ravel()), atol, rtol)
                    
        if rescale and not abs(ev - 1) < atol:
            A1 *= 1 / sp.sqrt(ev)
            if self.sanity_checks:
                if not A1 is A2:
                    log.warning("Sanity check failed: Re-scaling with A1 <> A2!")
                if calc_l:
                    tm.eps_l_noop_inplace(x, A1, A2, tmp)
                else:
                    tm.eps_r_noop_inplace(x, A1, A2, tmp)
                ev = tmp.mean() / x.mean()
                if not abs(ev - 1) < atol:
                    log.warning("Sanity check failed: Largest ev after re-scale = %s", ev)
        
        return x, i < max_itr - 1, i
Ejemplo n.º 25
0
def py_maxvol(A, tol = 1.05, max_iters = 100):
    """Python implementation of 1-volume maximization. For information see :py:func:`maxvol` function"""
    if tol < 1:
        tol = 1.0
    N, r = A.shape
    if N <= r:
        return np.arange(N, dtype = np.int32), np.eye(N, dtype = A.dtype)
    # DGETRF
    B = np.copy(A, order = 'F')
    C = np.copy(B.T, order = 'F')
    H, ipiv, info = get_lapack_funcs('getrf', [B])(B, overwrite_a = 1)
    # computing pivots from ipiv
    index = np.arange(N, dtype = np.int32)
    for i in xrange(r):
        tmp = index[i]
        index[i] = index[ipiv[i]]
        index[ipiv[i]] = tmp
    # solve A = CH, H is in LU format
    B = H[:r]
    # It will be much faster to use dtrsm instead of dtrtrs
    trtrs = get_lapack_funcs('trtrs', [B])
    trtrs(B, C, trans = 1, lower = 0, unitdiag = 0, overwrite_b = 1)
    trtrs(B, C, trans = 1, lower = 1, unitdiag = 1, overwrite_b = 1)
    # C has shape (r, N) -- it is stored transposed
    # find max value in C
    i, j = divmod(abs(C).argmax(), N)
    # set cgeru or zgeru for complex numbers and dger or sger for float numbers
    try:
        ger = get_blas_funcs('geru', [C])
    except:
        ger = get_blas_funcs('ger', [C])
    # set iters to 0
    iters = 0
    # check if need to swap rows
    while abs(C[i,j]) > tol and iters < max_iters:
        # add j to index and recompute C by SVM-formula
        index[i] = j
        tmp_row = C[i].copy()
        tmp_column = C[:,j].copy()
        tmp_column[i] -= 1.
        alpha = -1./C[i,j]
        ger(alpha, tmp_column, tmp_row, a = C, overwrite_a = 1)
        iters += 1
        i, j = divmod(abs(C).argmax(), N)
    return index[:r].copy(), C.T
Ejemplo n.º 26
0
def fast_dot(A, B):
    #  only try to use fast_dot for older numpy versions.
    #  the related issue has been tackled meanwhile. Also, depending on the build
    #  the current numpy master's dot can about 3 times faster.
    if LooseVersion(np.__version__) < '1.7.2':  # backported
        try:
            linalg.get_blas_funcs(['gemm'])
        except (AttributeError, ValueError):
            warnings.warn('Could not import BLAS, falling back to np.dot')
            return np.dot(A, B)

        try:
            return _fast_dot(A, B)
        except ValueError:
            # Maltyped or malformed data.
            return np.dot(A, B)
    else:
        return np.dot(A, B)
Ejemplo n.º 27
0
def norm(x):
    """Compute the Euclidean or Frobenius norm of x.

    Returns the Euclidean norm when x is a vector, the Frobenius norm when x
    is a matrix (2-d array).
    """
    x = np.asarray(x)
    nrm2, = linalg.get_blas_funcs(['nrm2'], [x])
    return nrm2(x)
Ejemplo n.º 28
0
    def get_copula_function(self):
        if self.copula_eval is not None:
            return self.copula_eval

        #1/sqrt(det R) * exp(-.5 (phi-1(us))T . (R^-1 - I) . (phi-1(us)))
        coef = 1.0 / np.sqrt(np.abs(linalg.det(self.R)))
        variance = self.R.getI() - np.identity(self.R.shape[0])
        blas_symv = get_blas_funcs("symv", [variance])
        blas_dot = get_blas_funcs("dot", [variance])

        def eval(*us):
            invphis = ndtri(us) #Faster than norm.ppf(us)
            #sandwich = (invphis * variance).dot(invphis)
            sandwich = blas_dot(invphis, blas_symv(1, variance, invphis))
            return coef * np.exp(-.5 * sandwich)

        self.copula_eval = eval
        return eval
Ejemplo n.º 29
0
def norm(x):
    """Compute the Euclidean or Frobenius norm of x.

    Returns the Euclidean norm when x is a vector, the Frobenius norm when x
    is a matrix (2-d array). More precise than sqrt(squared_norm(x)).
    """
    x = np.asarray(x)
    nrm2, = linalg.get_blas_funcs(["nrm2"], [x])
    return nrm2(x)
Ejemplo n.º 30
0
def dot(A,B):
    """
    Uses blas libraries directly to perform dot product
    """
    A, trans_a = _force_forder(A)
    B, trans_b = _force_forder(B)
    gemm_dot = linalg.get_blas_funcs("gemm", arrays=(A,B))
    
    # gemm is implemented to compute: C = alpha*AB  + beta*C
    return gemm_dot(alpha=1.0, a=A, b=B, trans_a=trans_a, trans_b=trans_b)
Ejemplo n.º 31
0
def _fast_dot(A, B):
    if B.shape[0] != A.shape[A.ndim - 1]:  # check adopted from '_dotblas.c'
        raise ValueError

    if A.dtype != B.dtype or any(x.dtype not in (np.float32, np.float64)
                                 for x in [A, B]):
        warnings.warn('Falling back to np.dot. '
                      'Data must be of same type of either '
                      '32 or 64 bit float for the BLAS function, gemm, to be '
                      'used for an efficient dot operation. ',
                      NonBLASDotWarning)
        raise ValueError

    if min(A.shape) == 1 or min(B.shape) == 1 or A.ndim != 2 or B.ndim != 2:
        raise ValueError

    # scipy 0.9 compliant API
    dot = linalg.get_blas_funcs(['gemm'], (A, B))[0]
    A, trans_a = _impose_f_order(A)
    B, trans_b = _impose_f_order(B)
    return dot(alpha=1.0, a=A, b=B, trans_a=trans_a, trans_b=trans_b)
Ejemplo n.º 32
0
def _update_dict_slow(X,
                      A,
                      B,
                      G,
                      Q,
                      Q_idx,
                      idx,
                      fit_intercept,
                      components_range,
                      norm,
                      impute=True):
    Q_idx = Q[:, idx]

    if impute:
        old_sub_G = Q_idx.dot(Q_idx.T)

    ger, = linalg.get_blas_funcs(('ger', ), (A, Q_idx))
    R = B[:, idx] - np.dot(Q_idx.T, A).T

    # norm = np.sqrt(np.sum(Q_idx ** 2, axis=1))
    norm = np.sqrt(np.sum(Q**2, axis=1))
    # Intercept on first column
    for j in components_range:
        ger(1.0, A[j], Q_idx[j], a=R, overwrite_a=True)
        Q_idx[j] = R[j] / A[j, j]
        # new_norm = np.sqrt(np.sum(Q_idx[j] ** 2))
        # if new_norm > norm[j]:
        #     Q_idx[j] /= new_norm / norm[j]
        Q[j, idx] = Q_idx[j]
        new_norm = np.sqrt(np.sum(Q[j]**2))
        if new_norm > 1:
            Q_idx[j] /= new_norm
            Q[j] /= new_norm

        ger(-1.0, A[j], Q_idx[j], a=R, overwrite_a=True)

    Q[:, idx] = Q_idx

    if impute:
        G += Q_idx.dot(Q_idx.T) - old_sub_G
Ejemplo n.º 33
0
def gemm(a, b, alpha=1., **kwargs):
    """
    Wrapper for gemm in scipy.linalg.  Detects which precision to use,
    and alpha (result multiplier) is default 1.0.

    GEMM performs a matrix-matrix multiplation (or matrix-vector)

    C = alpha*op(A)*op(B) + beta*C

    A,B,C are matrices, alpha and beta are scalars
    op(X) is either X or X', depending on whether trans_a or trans_b are 1
    beta and C are optional

    op(A) must be m by k
    op(B) must be k by n
    C, if supplied, must be m by n

    set overwrite_c to 1 to use C's memory for output
    """
    from scipy.linalg import get_blas_funcs
    _gemm, = get_blas_funcs(('gemm',), (a, b))
    return _gemm(alpha, a, b, **kwargs)
Ejemplo n.º 34
0
def fast_dot(A, B):
    """Compute fast dot products directly calling BLAS.

    This function calls BLAS directly while warranting Fortran contiguity.
    This helps avoiding extra copies `np.dot` would have created.
    For details see section `Linear Algebra on large Arrays`:
    http://wiki.scipy.org/PerformanceTips

    Parameters
    ----------
    A, B: instance of np.ndarray
        input matrices.
    """
    if A.dtype != B.dtype:
        raise ValueError('A and B must be of the same type.')
    if A.dtype not in (np.float32, np.float64):
        raise ValueError('Data must be single or double precision float.')

    dot = linalg.get_blas_funcs('gemm', (A, B))
    A, trans_a = _impose_f_order(A)
    B, trans_b = _impose_f_order(B)
    return dot(alpha=1.0, a=A, b=B, trans_a=trans_a, trans_b=trans_b)
Ejemplo n.º 35
0
    def test_tbsv(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES):
            n = 6
            k = 3
            x = rand(n).astype(dtype)
            A = zeros((n, n), dtype=dtype)
            # Banded upper triangular array
            for sup in range(k + 1):
                A[arange(n - sup), arange(sup, n)] = rand(n - sup)

            # Add complex parts for c,z
            if ind > 1:
                A[nonzero(A)] += 1j * rand((k + 1) * n -
                                           (k * (k + 1) // 2)).astype(dtype)

            # Form the banded storage
            Ab = zeros((k + 1, n), dtype=dtype)
            for row in range(k + 1):
                Ab[-row - 1, row:] = diag(A, k=row)
            func, = get_blas_funcs(('tbsv', ), dtype=dtype)

            y1 = func(k=k, a=Ab, x=x)
            y2 = solve(A, x)
            assert_array_almost_equal(y1, y2)

            y1 = func(k=k, a=Ab, x=x, diag=1)
            A[arange(n), arange(n)] = dtype(1)
            y2 = solve(A, x)
            assert_array_almost_equal(y1, y2)

            y1 = func(k=k, a=Ab, x=x, diag=1, trans=1)
            y2 = solve(A.T, x)
            assert_array_almost_equal(y1, y2)

            y1 = func(k=k, a=Ab, x=x, diag=1, trans=2)
            y2 = solve(A.conj().T, x)
            assert_array_almost_equal(y1, y2)
Ejemplo n.º 36
0
    def test_trmv(self):
        seed(1234)
        for ind, dtype in enumerate(DTYPES):
            n = 3
            A = (rand(n, n) + eye(n)).astype(dtype)
            x = rand(3).astype(dtype)
            func, = get_blas_funcs(('trmv', ), dtype=dtype)

            y1 = func(a=A, x=x)
            y2 = triu(A).dot(x)
            assert_array_almost_equal(y1, y2)

            y1 = func(a=A, x=x, diag=1)
            A[arange(n), arange(n)] = dtype(1)
            y2 = triu(A).dot(x)
            assert_array_almost_equal(y1, y2)

            y1 = func(a=A, x=x, diag=1, trans=1)
            y2 = triu(A).T.dot(x)
            assert_array_almost_equal(y1, y2)

            y1 = func(a=A, x=x, diag=1, trans=2)
            y2 = triu(A).conj().T.dot(x)
            assert_array_almost_equal(y1, y2)
Ejemplo n.º 37
0
def test_get_blas_funcs():
    # check that it returns Fortran code for arrays that are
    # fortran-ordered
    f1, f2, f3 = get_blas_funcs(
        ('axpy', 'axpy', 'axpy'),
        (np.empty((2,2), dtype=np.complex64, order='F'),
         np.empty((2,2), dtype=np.complex128, order='C'))
        )

    # get_blas_funcs will choose libraries depending on most generic
    # array
    assert_equal(f1.typecode, 'z')
    assert_equal(f2.typecode, 'z')
    if cblas is not None:
        assert_equal(f1.module_name, 'cblas')
        assert_equal(f2.module_name, 'cblas')

    # check defaults.
    f1 = get_blas_funcs('rotg')
    assert_equal(f1.typecode, 'd')

    # check also dtype interface
    f1 = get_blas_funcs('gemm', dtype=np.complex64)
    assert_equal(f1.typecode, 'c')
    f1 = get_blas_funcs('gemm', dtype='F')
    assert_equal(f1.typecode, 'c')

    # extended precision complex
    f1 = get_blas_funcs('gemm', dtype=np.longcomplex)
    assert_equal(f1.typecode, 'z')

    # check safe complex upcasting
    f1 = get_blas_funcs('axpy',
                        (np.empty((2,2), dtype=np.float64),
                         np.empty((2,2), dtype=np.complex64))
                        )
    assert_equal(f1.typecode, 'z')
Ejemplo n.º 38
0
def _cholesky_omp(X,
                  y,
                  n_nonzero_coefs,
                  tol=None,
                  copy_X=True,
                  return_path=False):
    """Orthogonal Matching Pursuit step using the Cholesky decomposition.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        Input dictionary. Columns are assumed to have unit norm.

    y : array, shape (n_samples,)
        Input targets

    n_nonzero_coefs : int
        Targeted number of non-zero elements

    tol : float
        Targeted squared error, if not None overrides n_nonzero_coefs.

    copy_X : bool, optional
        Whether the design matrix X must be copied by the algorithm. A false
        value is only helpful if X is already Fortran-ordered, otherwise a
        copy is made anyway.

    return_path : bool, optional. Default: False
        Whether to return every value of the nonzero coefficients along the
        forward path. Useful for cross-validation.

    Returns
    -------
    gamma : array, shape (n_nonzero_coefs,)
        Non-zero elements of the solution

    idx : array, shape (n_nonzero_coefs,)
        Indices of the positions of the elements in gamma within the solution
        vector

    coef : array, shape (n_features, n_nonzero_coefs)
        The first k values of column k correspond to the coefficient value
        for the active features at that step. The lower left triangle contains
        garbage. Only returned if ``return_path=True``.

    n_active : int
        Number of active features at convergence.
    """
    if copy_X:
        X = X.copy('F')
    else:  # even if we are allowed to overwrite, still copy it if bad order
        X = np.asfortranarray(X)

    min_float = np.finfo(X.dtype).eps
    nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X, ))
    potrs, = get_lapack_funcs(('potrs', ), (X, ))

    alpha = np.dot(X.T, y)
    residual = y
    gamma = np.empty(0)
    n_active = 0
    indices = np.arange(X.shape[1])  # keeping track of swapping

    max_features = X.shape[1] if tol is not None else n_nonzero_coefs

    L = np.empty((max_features, max_features), dtype=X.dtype)

    if return_path:
        coefs = np.empty_like(L)

    while True:
        lam = np.argmax(np.abs(np.dot(X.T, residual)))
        if lam < n_active or alpha[lam]**2 < min_float:
            # atom already selected or inner product too small
            warnings.warn(premature, RuntimeWarning, stacklevel=2)
            break

        if n_active > 0:
            # Updates the Cholesky decomposition of X' X
            L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam])
            linalg.solve_triangular(L[:n_active, :n_active],
                                    L[n_active, :n_active],
                                    trans=0,
                                    lower=1,
                                    overwrite_b=True,
                                    check_finite=False)
            v = nrm2(L[n_active, :n_active])**2
            Lkk = linalg.norm(X[:, lam])**2 - v
            if Lkk <= min_float:  # selected atoms are dependent
                warnings.warn(premature, RuntimeWarning, stacklevel=2)
                break
            L[n_active, n_active] = sqrt(Lkk)
        else:
            L[0, 0] = linalg.norm(X[:, lam])

        X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam])
        alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active]
        indices[n_active], indices[lam] = indices[lam], indices[n_active]
        n_active += 1

        # solves LL'x = X'y as a composition of two triangular systems
        gamma, _ = potrs(L[:n_active, :n_active],
                         alpha[:n_active],
                         lower=True,
                         overwrite_b=False)

        if return_path:
            coefs[:n_active, n_active - 1] = gamma
        residual = y - np.dot(X[:, :n_active], gamma)
        if tol is not None and nrm2(residual)**2 <= tol:
            break
        elif n_active == max_features:
            break

    if return_path:
        return gamma, indices[:n_active], coefs[:, :n_active], n_active
    else:
        return gamma, indices[:n_active], n_active
Ejemplo n.º 39
0
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True):
    """Orthogonal Matching Pursuit step using the Cholesky decomposition.

    Parameters:
    -----------
    X: array, shape = (n_samples, n_features)
        Input dictionary. Columns are assumed to have unit norm.

    y: array, shape = (n_samples,)
        Input targets

    n_nonzero_coefs: int
        Targeted number of non-zero elements

    tol: float
        Targeted squared error, if not None overrides n_nonzero_coefs.

    copy_X: bool, optional
        Whether the design matrix X must be copied by the algorithm. A false
        value is only helpful if X is already Fortran-ordered, otherwise a
        copy is made anyway.

    Returns:
    --------
    gamma: array, shape = (n_nonzero_coefs,)
        Non-zero elements of the solution

    idx: array, shape = (n_nonzero_coefs,)
        Indices of the positions of the elements in gamma within the solution
        vector

    """
    if copy_X:
        X = X.copy('F')
    else:  # even if we are allowed to overwrite, still copy it if bad order
        X = np.asfortranarray(X)

    min_float = np.finfo(X.dtype).eps
    nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X, ))
    potrs, = get_lapack_funcs(('potrs', ), (X, ))

    alpha = np.dot(X.T, y)
    residual = y
    gamma = np.empty(0)
    n_active = 0
    indices = range(X.shape[1])  # keeping track of swapping

    max_features = X.shape[1] if tol is not None else n_nonzero_coefs
    L = np.empty((max_features, max_features), dtype=X.dtype)
    L[0, 0] = 1.

    while True:
        lam = np.argmax(np.abs(np.dot(X.T, residual)))
        if lam < n_active or alpha[lam]**2 < min_float:
            # atom already selected or inner product too small
            warn(premature)
            break
        if n_active > 0:
            # Updates the Cholesky decomposition of X' X
            L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam])
            solve_triangular(L[:n_active, :n_active], L[n_active, :n_active])
            v = nrm2(L[n_active, :n_active])**2
            if 1 - v <= min_float:  # selected atoms are dependent
                warn(premature)
                break
            L[n_active, n_active] = np.sqrt(1 - v)
        X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam])
        alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active]
        indices[n_active], indices[lam] = indices[lam], indices[n_active]
        n_active += 1
        # solves LL'x = y as a composition of two triangular systems
        gamma, _ = potrs(L[:n_active, :n_active],
                         alpha[:n_active],
                         lower=True,
                         overwrite_b=False)

        residual = y - np.dot(X[:, :n_active], gamma)
        if tol is not None and nrm2(residual)**2 <= tol:
            break
        elif n_active == max_features:
            break

    return gamma, indices[:n_active]
Ejemplo n.º 40
0
__author__ = "Nikolay Bryskin"  # https://github.com/nikicat

import numpy as np
import sys
from collections import deque
from scipy.linalg import get_blas_funcs

gemm = get_blas_funcs('gemm')

#np.dot = lambda a, b: gemm(1, a, b)

OFFSET = 3000
SKIP_SIZE = 27
N_CHANNELS = 21

try:
    profile(lambda: 1)
except NameError:

    def profile(func):
        return func


class DelayedRLSPredictor:
    def __init__(self,
                 n_channels,
                 target_channel,
                 M=3,
                 lambda_=0.999,
                 delta=100,
                 delay=0,
class FullHessianUpdateStrategy(HessianUpdateStrategy):
    """Hessian update strategy with full dimensional internal representation.
    """
    _syr = get_blas_funcs('syr', dtype='d')  # Symmetric rank 1 update
    _syr2 = get_blas_funcs('syr2', dtype='d')  # Symmetric rank 2 update
    # Symmetric matrix-vector product
    _symv = get_blas_funcs('symv', dtype='d')

    def __init__(self, init_scale='auto'):
        self.init_scale = init_scale
        # Until initialize is called we can't really use the class,
        # so it makes sense to set everything to None.
        self.first_iteration = None
        self.approx_type = None
        self.B = None
        self.H = None

    def initialize(self, n, approx_type):
        """Initialize internal matrix.

        Allocate internal memory for storing and updating
        the Hessian or its inverse.

        Parameters
        ----------
        n : int
            Problem dimension.
        approx_type : {'hess', 'inv_hess'}
            Selects either the Hessian or the inverse Hessian.
            When set to 'hess' the Hessian will be stored and updated.
            When set to 'inv_hess' its inverse will be used instead.
        """
        self.first_iteration = True
        self.n = n
        self.approx_type = approx_type
        if approx_type not in ('hess', 'inv_hess'):
            raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.")
        # Create matrix
        if self.approx_type == 'hess':
            self.B = np.eye(n, dtype=float)
        else:
            self.H = np.eye(n, dtype=float)

    def _auto_scale(self, delta_x, delta_grad):
        # Heuristic to scale matrix at first iteration.
        # Described in Nocedal and Wright "Numerical Optimization"
        # p.143 formula (6.20).
        s_norm2 = np.dot(delta_x, delta_x)
        y_norm2 = np.dot(delta_grad, delta_grad)
        ys = np.abs(np.dot(delta_grad, delta_x))
        if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0:
            return 1
        if self.approx_type == 'hess':
            return y_norm2 / ys
        else:
            return ys / y_norm2

    def _update_implementation(self, delta_x, delta_grad):
        raise NotImplementedError("The method ``_update_implementation``"
                                  " is not implemented.")

    def update(self, delta_x, delta_grad):
        """Update internal matrix.

        Update Hessian matrix or its inverse (depending on how 'approx_type'
        is defined) using information about the last evaluated points.

        Parameters
        ----------
        delta_x : ndarray
            The difference between two points the gradient
            function have been evaluated at: ``delta_x = x2 - x1``.
        delta_grad : ndarray
            The difference between the gradients:
            ``delta_grad = grad(x2) - grad(x1)``.
        """
        if np.all(delta_x == 0.0):
            return
        if np.all(delta_grad == 0.0):
            warn('delta_grad == 0.0. Check if the approximated '
                 'function is linear. If the function is linear '
                 'better results can be obtained by defining the '
                 'Hessian as zero instead of using quasi-Newton '
                 'approximations.', UserWarning)
            return
        if self.first_iteration:
            # Get user specific scale
            if self.init_scale == "auto":
                scale = self._auto_scale(delta_x, delta_grad)
            else:
                scale = float(self.init_scale)
            # Scale initial matrix with ``scale * np.eye(n)``
            if self.approx_type == 'hess':
                self.B *= scale
            else:
                self.H *= scale
            self.first_iteration = False
        self._update_implementation(delta_x, delta_grad)

    def dot(self, p):
        """Compute the product of the internal matrix with the given vector.

        Parameters
        ----------
        p : array_like
            1-d array representing a vector.

        Returns
        -------
        Hp : array
            1-d  represents the result of multiplying the approximation matrix
            by vector p.
        """
        if self.approx_type == 'hess':
            return self._symv(1, self.B, p)
        else:
            return self._symv(1, self.H, p)

    def get_matrix(self):
        """Return the current internal matrix.

        Returns
        -------
        M : ndarray, shape (n, n)
            Dense matrix containing either the Hessian or its inverse
            (depending on how `approx_type` was defined).
        """
        if self.approx_type == 'hess':
            M = np.copy(self.B)
        else:
            M = np.copy(self.H)
        li = np.tril_indices_from(M, k=-1)
        M[li] = M.T[li]
        return M
Ejemplo n.º 42
0
from __future__ import division
import numpy as np
import math
import scipy.linalg as linalg
from Cholesky import Cholesky
import random
trm = linalg.get_blas_funcs('trmm')
from scipy.special import gammaln as gamlog


class Gauss_Wishart_model:
    '''Probability distributions for a Bayesian Normal Wishart probability model.'''
    def __init__(self, Gaussian_component):
        self.GaussComp = Gaussian_component
        self.GaussComp._Gaussian_component__update_params(scale=1,
                                                          prec=1,
                                                          XX_T=1)
        self.d = self.GaussComp.d
        self.n = self.GaussComp.n
        self.prec_mu_norm_Z = None  #normalizing constant of the prior probability
        self.prec_norm_Z = None  #normalizing constant of the prior probability of the precision matrix
        self.mu_norm_Z = None  #normalizing constant of the prior probability of the mean

        self.prior_lp = None  #log prior probability of the mean and precision matrix
        self.prior_lp_prec = None  #log prior probability of the precision matrix
        self.prior_lp_mu = None  #log prior probability of the mean

        self.data_lp = None  #log probability of the data

        self.joint_lp = None  #Joint log probability (data, mean, precision)
Ejemplo n.º 43
0
def gmres_householder(A,
                      b,
                      x0=None,
                      tol=1e-5,
                      restrt=None,
                      maxiter=None,
                      xtype=None,
                      M=None,
                      callback=None,
                      residuals=None):
    '''
    Generalized Minimum Residual Method (GMRES)
        GMRES iteratively refines the initial solution guess to the
        system Ax = b
        Householder reflections are used for orthogonalization

    Parameters
    ----------
    A : {array, matrix, sparse matrix, LinearOperator}
        n x n, linear system to solve
    b : {array, matrix}
        right hand side, shape is (n,) or (n, 1)
    x0 : {array, matrix}
        initial guess, default is a vector of zeros
    tol : float
        relative convergence tolerance, i.e. tol is scaled by the norm
        of the initial preconditioned residual
    restrt : {None, int}
        - if int, restrt is max number of inner iterations
          and maxiter is the max number of outer iterations
        - if None, do not restart GMRES, and max number of inner iterations
          is maxiter
    maxiter : {None, int}
        - if restrt is None, maxiter is the max number of inner iterations
          and GMRES does not restart
        - if restrt is int, maxiter is the max number of outer iterations,
          and restrt is the max number of inner iterations
    xtype : type
        dtype for the solution, default is automatic type detection
    M : {array, matrix, sparse matrix, LinearOperator}
        n x n, inverted preconditioner, i.e. solve M A x = M b.
    callback : function
        User-supplied function is called after each iteration as
        callback( ||rk||_2 ), where rk is the current preconditioned residual
        vector
    residuals : list
        residuals contains the preconditioned residual norm history,
        including the initial residual.

    Returns
    -------
    (xNew, info)
    xNew : an updated guess to the solution of Ax = b
    info : halting status of gmres

            ==  =============================================
            0   successful exit
            >0  convergence to tolerance not achieved,
                return iteration count instead.  This value
                is precisely the order of the Krylov space.
            <0  numerical breakdown, or illegal input
            ==  =============================================

    Notes
    -----
        - The LinearOperator class is in scipy.sparse.linalg.interface.
          Use this class if you prefer to define A or M as a mat-vec routine
          as opposed to explicitly constructing the matrix.  A.psolve(..) is
          still supported as a legacy.
        - For robustness, Householder reflections are used to orthonormalize
          the Krylov Space
          Givens Rotations are used to provide the residual norm each iteration

    Examples
    --------
    >>> from pyamg.krylov import gmres
    >>> from pyamg.util.linalg import norm
    >>> import numpy
    >>> from pyamg.gallery import poisson
    >>> A = poisson((10, 10))
    >>> b = numpy.ones((A.shape[0],))
    >>> (x, flag) = gmres(A, b, maxiter=2, tol=1e-8, orthog='householder')
    >>> print norm(b - A*x)
    6.5428213057

    References
    ----------
    .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems,
       Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003
       http://www-users.cs.umn.edu/~saad/books.html

    '''
    # Convert inputs to linear system, with error checking
    A, M, x, b, postprocess = make_system(A, M, x0, b, xtype)
    dimen = A.shape[0]

    ##
    # Ensure that warnings are always reissued from this function
    import warnings
    warnings.filterwarnings('always',
                            module='pyamg\.krylov\._gmres_householder')

    # Choose type
    if not hasattr(A, 'dtype'):
        Atype = upcast(x.dtype, b.dtype)
    else:
        Atype = A.dtype
    if not hasattr(M, 'dtype'):
        Mtype = upcast(x.dtype, b.dtype)
    else:
        Mtype = M.dtype
    xtype = upcast(Atype, x.dtype, b.dtype, Mtype)

    # known bug vvvvvv remove when fixed
    if Atype == complex:
        raise ValueError('[Known Bug] Housholder fails with complex matrices; \
                          use MGS')

    if restrt is not None:
        restrt = int(restrt)
    if maxiter is not None:
        maxiter = int(maxiter)

    # Should norm(r) be kept
    if residuals == []:
        keep_r = True
    else:
        keep_r = False

    # Set number of outer and inner iterations
    if restrt:
        if maxiter:
            max_outer = maxiter
        else:
            max_outer = 1
        if restrt > dimen:
            warn('Setting number of inner iterations (restrt) to maximum \
                  allowed, which is A.shape[0] ')
            restrt = dimen
        max_inner = restrt
    else:
        max_outer = 1
        if maxiter > dimen:
            warn('Setting number of inner iterations (maxiter) to maximum \
                  allowed, which is A.shape[0] ')
            maxiter = dimen
        elif maxiter is None:
            maxiter = min(dimen, 40)
        max_inner = maxiter

    # Get fast access to underlying BLAS routines
    [rotg] = get_blas_funcs(['rotg'], [x])

    # Is this a one dimensional matrix?
    if dimen == 1:
        entry = ravel(A * array([1.0], dtype=xtype))
        return (postprocess(b / entry), 0)

    # Prep for method
    r = b - ravel(A * x)

    #Apply preconditioner
    r = ravel(M * r)
    normr = norm(r)
    if keep_r:
        residuals.append(normr)
    ## Check for nan, inf
    #if isnan(r).any() or isinf(r).any():
    #    warn('inf or nan after application of preconditioner')
    #    return(postprocess(x), -1)

    # Check initial guess ( scaling by b, if b != 0,
    #   must account for case when norm(b) is very small)
    normb = norm(b)
    if normb == 0.0:
        normb = 1.0
    if normr < tol * normb:
        if callback is not None:
            callback(norm(r))
        return (postprocess(x), 0)

    # Scale tol by ||r_0||_2, we use the preconditioned residual
    # because this is left preconditioned GMRES.
    if normr != 0.0:
        tol = tol * normr

    # Use separate variable to track iterations.  If convergence fails, we
    # cannot simply report niter = (outer-1)*max_outer + inner.  Numerical
    # error could cause the inner loop to halt while the actual ||r|| > tol.
    niter = 0

    # Begin GMRES
    for outer in range(max_outer):

        # Calculate vector w, which defines the Householder reflector
        #    Take shortcut in calculating,
        #    w = r + sign(r[1])*||r||_2*e_1
        w = r
        beta = mysign(w[0]) * normr
        w[0] = w[0] + beta
        w[:] = w / norm(w)

        # Preallocate for Krylov vectors, Householder reflectors and
        # Hessenberg matrix
        # Space required is O(dimen*max_inner)
        # Givens Rotations
        Q = zeros((4 * max_inner, ), dtype=xtype)
        # upper Hessenberg matrix (made upper tri with Givens Rotations)
        H = zeros((max_inner, max_inner), dtype=xtype)
        # Householder reflectors
        W = zeros((max_inner + 1, dimen), dtype=xtype)
        W[0, :] = w

        # Multiply r with (I - 2*w*w.T), i.e. apply the Householder reflector
        # This is the RHS vector for the problem in the Krylov Space
        g = zeros((dimen, ), dtype=xtype)
        g[0] = -beta

        for inner in range(max_inner):
            # Calculate Krylov vector in two steps
            # (1) Calculate v = P_j = (I - 2*w*w.T)v, where k = inner
            v = -2.0 * conjugate(w[inner]) * w
            v[inner] = v[inner] + 1.0
            # (2) Calculate the rest, v = P_1*P_2*P_3...P_{j-1}*ej.
            #for j in range(inner-1,-1,-1):
            #    v -= 2.0*dot(conjugate(W[j,:]), v)*W[j,:]
            amg_core.apply_householders(v, ravel(W), dimen, inner - 1, -1, -1)

            # Calculate new search direction
            v = ravel(A * v)

            #Apply preconditioner
            v = ravel(M * v)
            ## Check for nan, inf
            #if isnan(v).any() or isinf(v).any():
            #    warn('inf or nan after application of preconditioner')
            #    return(postprocess(x), -1)

            # Factor in all Householder orthogonal reflections on new search
            # direction
            #for j in range(inner+1):
            #    v -= 2.0*dot(conjugate(W[j,:]), v)*W[j,:]
            amg_core.apply_householders(v, ravel(W), dimen, 0, inner + 1, 1)

            # Calculate next Householder reflector, w
            #  w = v[inner+1:] + sign(v[inner+1])*||v[inner+1:]||_2*e_{inner+1)
            #  Note that if max_inner = dimen, then this is unnecessary for the
            #  last inner iteration, when inner = dimen-1.  Here we do not need
            #  to calculate a Householder reflector or Givens rotation because
            #  nnz(v) is already the desired length, i.e. we do not need to
            #  zero anything out.
            if inner != dimen - 1:
                if inner < (max_inner - 1):
                    w = W[inner + 1, :]
                vslice = v[inner + 1:]
                alpha = norm(vslice)
                if alpha != 0:
                    alpha = mysign(vslice[0]) * alpha
                    # do not need the final reflector for future calculations
                    if inner < (max_inner - 1):
                        w[inner + 1:] = vslice
                        w[inner + 1] += alpha
                        w[:] = w / norm(w)

                    # Apply new reflector to v
                    #  v = v - 2.0*w*(w.T*v)
                    v[inner + 1] = -alpha
                    v[inner + 2:] = 0.0

            if inner > 0:
                # Apply all previous Givens Rotations to v
                amg_core.apply_givens(Q, v, dimen, inner)

            # Calculate the next Givens rotation, where j = inner Note that if
            # max_inner = dimen, then this is unnecessary for the last inner
            # iteration, when inner = dimen-1.  Here we do not need to
            # calculate a Householder reflector or Givens rotation because
            # nnz(v) is already the desired length, i.e. we do not need to zero
            # anything out.
            if inner != dimen - 1:
                if v[inner + 1] != 0:
                    [c, s] = rotg(v[inner], v[inner + 1])
                    Qblock = array([[c, s], [-conjugate(s), c]], dtype=xtype)
                    Q[(inner * 4):((inner + 1) * 4)] = ravel(Qblock).copy()

                    # Apply Givens Rotation to g, the RHS for the linear system
                    # in the Krylov Subspace.  Note that this dot does a matrix
                    # multiply, not an actual dot product where a conjugate
                    # transpose is taken
                    g[inner:inner + 2] = dot(Qblock, g[inner:inner + 2])

                    # Apply effect of Givens Rotation to v
                    v[inner] = dot(Qblock[0, :], v[inner:inner + 2])
                    v[inner + 1] = 0.0

            # Write to upper Hessenberg Matrix,
            #   the LHS for the linear system in the Krylov Subspace
            H[:, inner] = v[0:max_inner]

            niter += 1

            # Don't update normr if last inner iteration, because
            # normr is calculated directly after this loop ends.
            if inner < max_inner - 1:
                normr = abs(g[inner + 1])
                if normr < tol:
                    break

                # Allow user access to residual
                if callback is not None:
                    callback(normr)
                if keep_r:
                    residuals.append(normr)

        # end inner loop, back to outer loop

        # Find best update to x in Krylov Space, V.  Solve inner+1 x inner+1
        # system.  Apparently this is the best way to solve a triangular system
        # in the magical world of scipy
        #piv = arange(inner+1)
        #y = lu_solve((H[0:(inner+1), 0:(inner+1)], piv), g[0:(inner+1)],
        #             trans=0)
        y = scipy.linalg.solve(H[0:(inner + 1), 0:(inner + 1)],
                               g[0:(inner + 1)])

        # Use Horner like Scheme to map solution, y, back to original space.
        # Note that we do not use the last reflector.
        update = zeros(x.shape, dtype=xtype)
        #for j in range(inner,-1,-1):
        #    update[j] += y[j]
        #    # Apply j-th reflector, (I - 2.0*w_j*w_j.T)*upadate
        #    update -= 2.0*dot(conjugate(W[j,:]), update)*W[j,:]
        amg_core.householder_hornerscheme(update, ravel(W), ravel(y), dimen,
                                          inner, -1, -1)

        x[:] = x + update
        r = b - ravel(A * x)

        #Apply preconditioner
        r = ravel(M * r)
        normr = norm(r)
        ## Check for nan, inf
        #if isnan(r).any() or isinf(r).any():
        #    warn('inf or nan after application of preconditioner')
        #    return(postprocess(x), -1)

        # Allow user access to residual
        if callback is not None:
            callback(normr)
        if keep_r:
            residuals.append(normr)

        # Has GMRES stagnated?
        indices = (x != 0)
        if indices.any():
            change = max(abs(update[indices] / x[indices]))
            if change < 1e-12:
                # No change, halt
                return (postprocess(x), -1)

        # test for convergence
        if normr < tol:
            return (postprocess(x), 0)

    # end outer loop

    return (postprocess(x), niter)
Ejemplo n.º 44
0
def gcrotmk(A,
            b,
            x0=None,
            tol=1e-5,
            maxiter=1000,
            M=None,
            callback=None,
            m=20,
            k=None,
            CU=None,
            discard_C=False,
            truncate='oldest',
            atol=None):
    """
    Solve a matrix equation using flexible GCROT(m,k) algorithm.

    Parameters
    ----------
    A : {sparse matrix, ndarray, LinearOperator}
        The real or complex N-by-N matrix of the linear system.
        Alternatively, ``A`` can be a linear operator which can
        produce ``Ax`` using, e.g.,
        ``scipy.sparse.linalg.LinearOperator``.
    b : ndarray
        Right hand side of the linear system. Has shape (N,) or (N,1).
    x0 : ndarray
        Starting guess for the solution.
    tol, atol : float, optional
        Tolerances for convergence, ``norm(residual) <= max(tol*norm(b), atol)``.
        The default for ``atol`` is `tol`.

        .. warning::

           The default value for `atol` will be changed in a future release.
           For future compatibility, specify `atol` explicitly.
    maxiter : int, optional
        Maximum number of iterations.  Iteration will stop after maxiter
        steps even if the specified tolerance has not been achieved.
    M : {sparse matrix, ndarray, LinearOperator}, optional
        Preconditioner for A.  The preconditioner should approximate the
        inverse of A. gcrotmk is a 'flexible' algorithm and the preconditioner
        can vary from iteration to iteration. Effective preconditioning
        dramatically improves the rate of convergence, which implies that
        fewer iterations are needed to reach a given error tolerance.
    callback : function, optional
        User-supplied function to call after each iteration.  It is called
        as callback(xk), where xk is the current solution vector.
    m : int, optional
        Number of inner FGMRES iterations per each outer iteration.
        Default: 20
    k : int, optional
        Number of vectors to carry between inner FGMRES iterations.
        According to [2]_, good values are around m.
        Default: m
    CU : list of tuples, optional
        List of tuples ``(c, u)`` which contain the columns of the matrices
        C and U in the GCROT(m,k) algorithm. For details, see [2]_.
        The list given and vectors contained in it are modified in-place.
        If not given, start from empty matrices. The ``c`` elements in the
        tuples can be ``None``, in which case the vectors are recomputed
        via ``c = A u`` on start and orthogonalized as described in [3]_.
    discard_C : bool, optional
        Discard the C-vectors at the end. Useful if recycling Krylov subspaces
        for different linear systems.
    truncate : {'oldest', 'smallest'}, optional
        Truncation scheme to use. Drop: oldest vectors, or vectors with
        smallest singular values using the scheme discussed in [1,2].
        See [2]_ for detailed comparison.
        Default: 'oldest'

    Returns
    -------
    x : ndarray
        The solution found.
    info : int
        Provides convergence information:

        * 0  : successful exit
        * >0 : convergence to tolerance not achieved, number of iterations

    References
    ----------
    .. [1] E. de Sturler, ''Truncation strategies for optimal Krylov subspace
           methods'', SIAM J. Numer. Anal. 36, 864 (1999).
    .. [2] J.E. Hicken and D.W. Zingg, ''A simplified and flexible variant
           of GCROT for solving nonsymmetric linear systems'',
           SIAM J. Sci. Comput. 32, 172 (2010).
    .. [3] M.L. Parks, E. de Sturler, G. Mackey, D.D. Johnson, S. Maiti,
           ''Recycling Krylov subspaces for sequences of linear systems'',
           SIAM J. Sci. Comput. 28, 1651 (2006).

    """
    A, M, x, b, postprocess = make_system(A, M, x0, b)

    if not np.isfinite(b).all():
        raise ValueError("RHS must contain only finite numbers")

    if truncate not in ('oldest', 'smallest'):
        raise ValueError("Invalid value for 'truncate': %r" % (truncate, ))

    if atol is None:
        warnings.warn(
            "scipy.sparse.linalg.gcrotmk called without specifying `atol`. "
            "The default value will change in the future. To preserve "
            "current behavior, set ``atol=tol``.",
            category=DeprecationWarning,
            stacklevel=2)
        atol = tol

    matvec = A.matvec
    psolve = M.matvec

    if CU is None:
        CU = []

    if k is None:
        k = m

    axpy, dot, scal = None, None, None

    r = b - matvec(x)

    axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'],
                                           (x, r))

    b_norm = nrm2(b)
    if b_norm == 0:
        x = b
        return (postprocess(x), 0)

    if discard_C:
        CU[:] = [(None, u) for c, u in CU]

    # Reorthogonalize old vectors
    if CU:
        # Sort already existing vectors to the front
        CU.sort(key=lambda cu: cu[0] is not None)

        # Fill-in missing ones
        C = np.empty((A.shape[0], len(CU)), dtype=r.dtype, order='F')
        us = []
        j = 0
        while CU:
            # More memory-efficient: throw away old vectors as we go
            c, u = CU.pop(0)
            if c is None:
                c = matvec(u)
            C[:, j] = c
            j += 1
            us.append(u)

        # Orthogonalize
        Q, R, P = qr(C, overwrite_a=True, mode='economic', pivoting=True)
        del C

        # C := Q
        cs = list(Q.T)

        # U := U P R^-1,  back-substitution
        new_us = []
        for j in range(len(cs)):
            u = us[P[j]]
            for i in range(j):
                u = axpy(us[P[i]], u, u.shape[0], -R[i, j])
            if abs(R[j, j]) < 1e-12 * abs(R[0, 0]):
                # discard rest of the vectors
                break
            u = scal(1.0 / R[j, j], u)
            new_us.append(u)

        # Form the new CU lists
        CU[:] = list(zip(cs, new_us))[::-1]

    if CU:
        axpy, dot = get_blas_funcs(['axpy', 'dot'], (r, ))

        # Solve first the projection operation with respect to the CU
        # vectors. This corresponds to modifying the initial guess to
        # be
        #
        #     x' = x + U y
        #     y = argmin_y || b - A (x + U y) ||^2
        #
        # The solution is y = C^H (b - A x)
        for c, u in CU:
            yc = dot(c, r)
            x = axpy(u, x, x.shape[0], yc)
            r = axpy(c, r, r.shape[0], -yc)

    # GCROT main iteration
    for j_outer in range(maxiter):
        # -- callback
        if callback is not None:
            callback(x)

        beta = nrm2(r)

        # -- check stopping condition
        beta_tol = max(atol, tol * b_norm)

        if beta <= beta_tol and (j_outer > 0 or CU):
            # recompute residual to avoid rounding error
            r = b - matvec(x)
            beta = nrm2(r)

        if beta <= beta_tol:
            j_outer = -1
            break

        ml = m + max(k - len(CU), 0)

        cs = [c for c, u in CU]

        try:
            Q, R, B, vs, zs, y, pres = _fgmres(matvec,
                                               r / beta,
                                               ml,
                                               rpsolve=psolve,
                                               atol=max(atol, tol * b_norm) /
                                               beta,
                                               cs=cs)
            y *= beta
        except LinAlgError:
            # Floating point over/underflow, non-finite result from
            # matmul etc. -- report failure.
            break

        #
        # At this point,
        #
        #     [A U, A Z] = [C, V] G;   G =  [ I  B ]
        #                                   [ 0  H ]
        #
        # where [C, V] has orthonormal columns, and r = beta v_0. Moreover,
        #
        #     || b - A (x + Z y + U q) ||_2 = || r - C B y - V H y - C q ||_2 = min!
        #
        # from which y = argmin_y || beta e_1 - H y ||_2, and q = -B y
        #

        #
        # GCROT(m,k) update
        #

        # Define new outer vectors

        # ux := (Z - U B) y
        ux = zs[0] * y[0]
        for z, yc in zip(zs[1:], y[1:]):
            ux = axpy(z, ux, ux.shape[0], yc)  # ux += z*yc
        by = B.dot(y)
        for cu, byc in zip(CU, by):
            c, u = cu
            ux = axpy(u, ux, ux.shape[0], -byc)  # ux -= u*byc

        # cx := V H y
        hy = Q.dot(R.dot(y))
        cx = vs[0] * hy[0]
        for v, hyc in zip(vs[1:], hy[1:]):
            cx = axpy(v, cx, cx.shape[0], hyc)  # cx += v*hyc

        # Normalize cx, maintaining cx = A ux
        # This new cx is orthogonal to the previous C, by construction
        try:
            alpha = 1 / nrm2(cx)
            if not np.isfinite(alpha):
                raise FloatingPointError()
        except (FloatingPointError, ZeroDivisionError):
            # Cannot update, so skip it
            continue

        cx = scal(alpha, cx)
        ux = scal(alpha, ux)

        # Update residual and solution
        gamma = dot(cx, r)
        r = axpy(cx, r, r.shape[0], -gamma)  # r -= gamma*cx
        x = axpy(ux, x, x.shape[0], gamma)  # x += gamma*ux

        # Truncate CU
        if truncate == 'oldest':
            while len(CU) >= k and CU:
                del CU[0]
        elif truncate == 'smallest':
            if len(CU) >= k and CU:
                # cf. [1,2]
                D = solve(R[:-1, :].T, B.T).T
                W, sigma, V = svd(D)

                # C := C W[:,:k-1],  U := U W[:,:k-1]
                new_CU = []
                for j, w in enumerate(W[:, :k - 1].T):
                    c, u = CU[0]
                    c = c * w[0]
                    u = u * w[0]
                    for cup, wp in zip(CU[1:], w[1:]):
                        cp, up = cup
                        c = axpy(cp, c, c.shape[0], wp)
                        u = axpy(up, u, u.shape[0], wp)

                    # Reorthogonalize at the same time; not necessary
                    # in exact arithmetic, but floating point error
                    # tends to accumulate here
                    for cp, up in new_CU:
                        alpha = dot(cp, c)
                        c = axpy(cp, c, c.shape[0], -alpha)
                        u = axpy(up, u, u.shape[0], -alpha)
                    alpha = nrm2(c)
                    c = scal(1.0 / alpha, c)
                    u = scal(1.0 / alpha, u)

                    new_CU.append((c, u))
                CU[:] = new_CU

        # Add new vector to CU
        CU.append((cx, ux))

    # Include the solution vector to the span
    CU.append((None, x.copy()))
    if discard_C:
        CU[:] = [(None, uz) for cz, uz in CU]

    return postprocess(x), j_outer + 1
Ejemplo n.º 45
0
def gmres_mgs(A,
              b,
              x0=None,
              tol=1e-5,
              restrt=None,
              maxiter=None,
              xtype=None,
              M=None,
              callback=None,
              residuals=None,
              reorth=False):
    '''
    Generalized Minimum Residual Method (GMRES)
        GMRES iteratively refines the initial solution guess to the system
        Ax = b
        Modified Gram-Schmidt version

    Parameters
    ----------
    A : {array, matrix, sparse matrix, LinearOperator}
        n x n, linear system to solve
    b : {array, matrix}
        right hand side, shape is (n,) or (n,1)
    x0 : {array, matrix}
        initial guess, default is a vector of zeros
    tol : float
        relative convergence tolerance, i.e. tol is scaled by the norm
        of the initial preconditioned residual
    restrt : {None, int}
        - if int, restrt is max number of inner iterations
          and maxiter is the max number of outer iterations
        - if None, do not restart GMRES, and max number of inner iterations
          is maxiter
    maxiter : {None, int}
        - if restrt is None, maxiter is the max number of inner iterations
          and GMRES does not restart
        - if restrt is int, maxiter is the max number of outer iterations,
          and restrt is the max number of inner iterations
    xtype : type
        dtype for the solution, default is automatic type detection
    M : {array, matrix, sparse matrix, LinearOperator}
        n x n, inverted preconditioner, i.e. solve M A x = M b.
    callback : function
        User-supplied function is called after each iteration as
        callback(xk), where xk is the current solution vector
    residuals : list
        residuals contains the preconditioned residual norm history,
        including the initial residual.
    reorth : boolean
        If True, then a check is made whether to re-orthogonalize the Krylov
        space each GMRES iteration

    Returns
    -------
    (xNew, info)
    xNew : an updated guess to the solution of Ax = b
    info : halting status of gmres

            ==  =============================================
            0   successful exit
            >0  convergence to tolerance not achieved,
                return iteration count instead.  This value
                is precisely the order of the Krylov space.
            <0  numerical breakdown, or illegal input
            ==  =============================================

    Notes
    -----
        - The LinearOperator class is in scipy.sparse.linalg.interface.
          Use this class if you prefer to define A or M as a mat-vec routine
          as opposed to explicitly constructing the matrix.  A.psolve(..) is
          still supported as a legacy.
        - For robustness, modified Gram-Schmidt is used to orthogonalize the
          Krylov Space Givens Rotations are used to provide the residual norm
          each iteration

    Examples
    --------
    >>> from pyamg.krylov import gmres
    >>> from pyamg.util.linalg import norm
    >>> import numpy as np
    >>> from pyamg.gallery import poisson
    >>> A = poisson((10,10))
    >>> b = np.ones((A.shape[0],))
    >>> (x,flag) = gmres(A,b, maxiter=2, tol=1e-8, orthog='mgs')
    >>> print norm(b - A*x)
    >>> 6.5428213057

    References
    ----------
    .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems,
       Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003
       http://www-users.cs.umn.edu/~saad/books.html

    .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html
    '''
    # Convert inputs to linear system, with error checking
    A, M, x, b, postprocess = make_system(A, M, x0, b, xtype)
    dimen = A.shape[0]

    # Ensure that warnings are always reissued from this function
    import warnings
    warnings.filterwarnings('always', module='pyamg\.krylov\._gmres_mgs')

    # Choose type
    if not hasattr(A, 'dtype'):
        Atype = upcast(x.dtype, b.dtype)
    else:
        Atype = A.dtype
    if not hasattr(M, 'dtype'):
        Mtype = upcast(x.dtype, b.dtype)
    else:
        Mtype = M.dtype
    xtype = upcast(Atype, x.dtype, b.dtype, Mtype)

    if restrt is not None:
        restrt = int(restrt)
    if maxiter is not None:
        maxiter = int(maxiter)

    # Get fast access to underlying BLAS routines
    # dotc is the conjugate dot, dotu does no conjugation
    [lartg] = get_lapack_funcs(['lartg'], [x])
    if np.iscomplexobj(np.zeros((1, ), dtype=xtype)):
        [axpy, dotu, dotc, scal] =\
            get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [x])
    else:
        # real type
        [axpy, dotu, dotc, scal] =\
            get_blas_funcs(['axpy', 'dot', 'dot',  'scal'], [x])

    # Make full use of direct access to BLAS by defining own norm
    def norm(z):
        return np.sqrt(np.real(dotc(z, z)))

    # Should norm(r) be kept
    if residuals == []:
        keep_r = True
    else:
        keep_r = False

    # Set number of outer and inner iterations
    if restrt:
        if maxiter:
            max_outer = maxiter
        else:
            max_outer = 1
        if restrt > dimen:
            warn('Setting number of inner iterations (restrt) to maximum\
                  allowed, which is A.shape[0] ')
            restrt = dimen
        max_inner = restrt
    else:
        max_outer = 1
        if maxiter > dimen:
            warn('Setting number of inner iterations (maxiter) to maximum\
                  allowed, which is A.shape[0] ')
            maxiter = dimen
        elif maxiter is None:
            maxiter = min(dimen, 40)
        max_inner = maxiter

    # Is this a one dimensional matrix?
    if dimen == 1:
        entry = np.ravel(A * np.array([1.0], dtype=xtype))
        return (postprocess(b / entry), 0)

    # Prep for method
    r = b - np.ravel(A * x)

    # Apply preconditioner
    r = np.ravel(M * r)
    normr = norm(r)
    if keep_r:
        residuals.append(normr)
    # Check for nan, inf
    # if isnan(r).any() or isinf(r).any():
    #    warn('inf or nan after application of preconditioner')
    #    return(postprocess(x), -1)

    # Check initial guess ( scaling by b, if b != 0,
    #   must account for case when norm(b) is very small)
    normb = norm(b)
    if normb == 0.0:
        normb = 1.0
    if normr < tol * normb:
        return (postprocess(x), 0)

    # Scale tol by ||r_0||_2, we use the preconditioned residual
    # because this is left preconditioned GMRES.
    if normr != 0.0:
        tol = tol * normr

    # Use separate variable to track iterations.  If convergence fails, we
    # cannot simply report niter = (outer-1)*max_outer + inner.  Numerical
    # error could cause the inner loop to halt while the actual ||r|| > tol.
    niter = 0

    # Begin GMRES
    for outer in range(max_outer):

        # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space
        # Space required is O(dimen*max_inner).
        # NOTE:  We are dealing with row-major matrices, so we traverse in a
        #        row-major fashion,
        #        i.e., H and V's transpose is what we store.
        Q = []  # Givens Rotations
        # Upper Hessenberg matrix, which is then
        #   converted to upper tri with Givens Rots
        H = np.zeros((max_inner + 1, max_inner + 1), dtype=xtype)
        V = np.zeros((max_inner + 1, dimen), dtype=xtype)  # Krylov Space
        # vs store the pointers to each column of V.
        #   This saves a considerable amount of time.
        vs = []
        # v = r/normr
        V[0, :] = scal(1.0 / normr, r)
        vs.append(V[0, :])

        # This is the RHS vector for the problem in the Krylov Space
        g = np.zeros((dimen, ), dtype=xtype)
        g[0] = normr

        for inner in range(max_inner):

            # New Search Direction
            v = V[inner + 1, :]
            v[:] = np.ravel(M * (A * vs[-1]))
            vs.append(v)
            normv_old = norm(v)

            # Check for nan, inf
            # if isnan(V[inner+1, :]).any() or isinf(V[inner+1, :]).any():
            #    warn('inf or nan after application of preconditioner')
            #    return(postprocess(x), -1)

            #  Modified Gram Schmidt
            for k in range(inner + 1):
                vk = vs[k]
                alpha = dotc(vk, v)
                H[inner, k] = alpha
                v[:] = axpy(vk, v, dimen, -alpha)

            normv = norm(v)
            H[inner, inner + 1] = normv

            # Re-orthogonalize
            if (reorth is True) and (normv_old == normv_old + 0.001 * normv):
                for k in range(inner + 1):
                    vk = vs[k]
                    alpha = dotc(vk, v)
                    H[inner, k] = H[inner, k] + alpha
                    v[:] = axpy(vk, v, dimen, -alpha)

            # Check for breakdown
            if H[inner, inner + 1] != 0.0:
                v[:] = scal(1.0 / H[inner, inner + 1], v)

            # Apply previous Givens rotations to H
            if inner > 0:
                apply_givens(Q, H[inner, :], inner)

            # Calculate and apply next complex-valued Givens Rotation
            # ==> Note that if max_inner = dimen, then this is unnecessary
            # for the last inner
            #     iteration, when inner = dimen-1.
            if inner != dimen - 1:
                if H[inner, inner + 1] != 0:
                    [c, s, r] = lartg(H[inner, inner], H[inner, inner + 1])
                    Qblock = np.array([[c, s], [-np.conjugate(s), c]],
                                      dtype=xtype)
                    Q.append(Qblock)

                    # Apply Givens Rotation to g,
                    #   the RHS for the linear system in the Krylov Subspace.
                    g[inner:inner + 2] = np.dot(Qblock, g[inner:inner + 2])

                    # Apply effect of Givens Rotation to H
                    H[inner, inner] = dotu(Qblock[0, :], H[inner,
                                                           inner:inner + 2])
                    H[inner, inner + 1] = 0.0

            niter += 1

            # Don't update normr if last inner iteration, because
            # normr is calculated directly after this loop ends.
            if inner < max_inner - 1:
                normr = np.abs(g[inner + 1])
                if normr < tol:
                    break

                # Allow user access to the iterates
                if callback is not None:
                    callback(x)
                if keep_r:
                    residuals.append(normr)

        # end inner loop, back to outer loop

        # Find best update to x in Krylov Space V.  Solve inner x inner system.
        y = sp.linalg.solve(H[0:inner + 1, 0:inner + 1].T, g[0:inner + 1])
        update = np.ravel(np.mat(V[:inner + 1, :]).T * y.reshape(-1, 1))
        x = x + update
        r = b - np.ravel(A * x)

        # Apply preconditioner
        r = np.ravel(M * r)
        normr = norm(r)
        # Check for nan, inf
        # if isnan(r).any() or isinf(r).any():
        #    warn('inf or nan after application of preconditioner')
        #    return(postprocess(x), -1)

        # Allow user access to the iterates
        if callback is not None:
            callback(x)
        if keep_r:
            residuals.append(normr)

        # Has GMRES stagnated?
        indices = (x != 0)
        if indices.any():
            change = np.max(np.abs(update[indices] / x[indices]))
            if change < 1e-12:
                # No change, halt
                return (postprocess(x), -1)

        # test for convergence
        if normr < tol:
            return (postprocess(x), 0)

    # end outer loop

    return (postprocess(x), niter)
Ejemplo n.º 46
0
def gmres_mgs(A,
              b,
              x0=None,
              tol=1e-5,
              restrt=None,
              maxiter=None,
              M=None,
              callback=None,
              residuals=None,
              reorth=False):
    """Generalized Minimum Residual Method (GMRES) based on MGS.

    GMRES iteratively refines the initial solution guess to the system
    Ax = b.  Modified Gram-Schmidt version.  Left preconditioning, leading
    to preconditioned residuals.

    Parameters
    ----------
    A : array, matrix, sparse matrix, LinearOperator
        n x n, linear system to solve
    b : array, matrix
        right hand side, shape is (n,) or (n,1)
    x0 : array, matrix
        initial guess, default is a vector of zeros
    tol : float
        Tolerance for stopping criteria, let r=r_k
           ||M r||     < tol ||M b||
        if ||b||=0, then set ||M b||=1 for these tests.
    restrt : None, int
        - if int, restrt is max number of inner iterations
          and maxiter is the max number of outer iterations
        - if None, do not restart GMRES, and max number of inner iterations
          is maxiter
    maxiter : None, int
        - if restrt is None, maxiter is the max number of inner iterations
          and GMRES does not restart
        - if restrt is int, maxiter is the max number of outer iterations,
          and restrt is the max number of inner iterations
        - defaults to min(n,40) if restart=None
    M : array, matrix, sparse matrix, LinearOperator
        n x n, inverted preconditioner, i.e. solve M A x = M b.
    callback : function
        User-supplied function is called after each iteration as
        callback(xk), where xk is the current solution vector
    residuals : list
        preconditioned residual history in the 2-norm,
        including the initial preconditioned residual
    reorth : boolean
        If True, then a check is made whether to re-orthogonalize the Krylov
        space each GMRES iteration

    Returns
    -------
    (xk, info)
    xk : an updated guess after k iterations to the solution of Ax = b
    info : halting status

            ==  =======================================
            0   successful exit
            >0  convergence to tolerance not achieved,
                return iteration count instead.
            <0  numerical breakdown, or illegal input
            ==  =======================================

    Notes
    -----
    The LinearOperator class is in scipy.sparse.linalg.interface.
    Use this class if you prefer to define A or M as a mat-vec routine
    as opposed to explicitly constructing the matrix.

    For robustness, modified Gram-Schmidt is used to orthogonalize the
    Krylov Space Givens Rotations are used to provide the residual norm
    each iteration

    The residual is the *preconditioned* residual.

    Examples
    --------
    >>> from pyamg.krylov import gmres
    >>> from pyamg.util.linalg import norm
    >>> import numpy as np
    >>> from pyamg.gallery import poisson
    >>> A = poisson((10,10))
    >>> b = np.ones((A.shape[0],))
    >>> (x,flag) = gmres(A,b, maxiter=2, tol=1e-8, orthog='mgs')
    >>> print(f'{norm(b - A*x):.6}')
    6.54282

    References
    ----------
    .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems,
       Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003
       http://www-users.cs.umn.edu/~saad/books.html

    .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html

    """
    # Convert inputs to linear system, with error checking
    A, M, x, b, postprocess = make_system(A, M, x0, b)
    n = A.shape[0]

    # Ensure that warnings are always reissued from this function
    warnings.filterwarnings('always', module='pyamg.krylov._gmres_mgs')

    # Get fast access to underlying BLAS routines
    # dotc is the conjugate dot, dotu does no conjugation
    [lartg] = get_lapack_funcs(['lartg'], [x])
    if np.iscomplexobj(np.zeros((1, ), dtype=x.dtype)):
        [axpy, dotu, dotc, scal] =\
            get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [x])
    else:
        # real type
        [axpy, dotu, dotc, scal] =\
            get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [x])

    # Set number of outer and inner iterations
    # If no restarts,
    #     then set max_inner=maxiter and max_outer=n
    # If restarts are set,
    #     then set max_inner=restart and max_outer=maxiter
    if restrt:
        if maxiter:
            max_outer = maxiter
        else:
            max_outer = 1
        if restrt > n:
            warn('Setting restrt to maximum allowed, n.')
            restrt = n
        max_inner = restrt
    else:
        max_outer = 1
        if maxiter > n:
            warn('Setting maxiter to maximum allowed, n.')
            maxiter = n
        elif maxiter is None:
            maxiter = min(n, 40)
        max_inner = maxiter

    # Is this a one dimensional matrix?
    if n == 1:
        entry = np.ravel(A @ np.array([1.0], dtype=x.dtype))
        return (postprocess(b / entry), 0)

    # Prep for method
    r = b - A @ x

    # Apply preconditioner
    r = M @ r

    normr = norm(r)
    if residuals is not None:
        residuals[:] = [normr]  # initial residual

    # Check initial guess if b != 0,
    normb = norm(b)
    if normb == 0.0:
        normMb = 1.0  # reset so that tol is unscaled
    else:
        normMb = norm(M @ b)

    # set the stopping criteria (see the docstring)
    if normr < tol * normMb:
        return (postprocess(x), 0)

    # Use separate variable to track iterations.  If convergence fails, we
    # cannot simply report niter = (outer-1)*max_outer + inner.  Numerical
    # error could cause the inner loop to halt while the actual ||r|| > tolerance.
    niter = 0

    # Begin GMRES
    for _outer in range(max_outer):

        # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space
        # Space required is O(n*max_inner).
        # NOTE:  We are dealing with row-major matrices, so we traverse in a
        #        row-major fashion,
        #        i.e., H and V's transpose is what we store.
        Q = []  # Givens Rotations
        # Upper Hessenberg matrix, which is then
        #   converted to upper tri with Givens Rots
        H = np.zeros((max_inner + 1, max_inner + 1), dtype=x.dtype)
        V = np.zeros((max_inner + 1, n), dtype=x.dtype)  # Krylov Space
        # vs store the pointers to each column of V.
        #   This saves a considerable amount of time.
        vs = []
        # v = r/normr
        V[0, :] = scal(1.0 / normr, r)
        vs.append(V[0, :])

        # This is the RHS vector for the problem in the Krylov Space
        g = np.zeros((n, ), dtype=x.dtype)
        g[0] = normr

        for inner in range(max_inner):

            # New Search Direction
            v = V[inner + 1, :]
            v[:] = np.ravel(M @ (A @ vs[-1]))
            vs.append(v)
            normv_old = norm(v)

            #  Modified Gram Schmidt
            for k in range(inner + 1):
                vk = vs[k]
                alpha = dotc(vk, v)
                H[inner, k] = alpha
                v[:] = axpy(vk, v, n, -alpha)

            normv = norm(v)
            H[inner, inner + 1] = normv

            # Re-orthogonalize
            if (reorth is True) and (normv_old == normv_old + 0.001 * normv):
                for k in range(inner + 1):
                    vk = vs[k]
                    alpha = dotc(vk, v)
                    H[inner, k] = H[inner, k] + alpha
                    v[:] = axpy(vk, v, n, -alpha)

            # Check for breakdown
            if H[inner, inner + 1] != 0.0:
                v[:] = scal(1.0 / H[inner, inner + 1], v)

            # Apply previous Givens rotations to H
            if inner > 0:
                apply_givens(Q, H[inner, :], inner)

            # Calculate and apply next complex-valued Givens Rotation
            # for the last inner iteration, when inner = n-1.
            # ==> Note that if max_inner = n, then this is unnecessary
            if inner != n - 1:
                if H[inner, inner + 1] != 0:
                    [c, s, r] = lartg(H[inner, inner], H[inner, inner + 1])
                    Qblock = np.array([[c, s], [-np.conjugate(s), c]],
                                      dtype=x.dtype)
                    Q.append(Qblock)

                    # Apply Givens Rotation to g,
                    #   the RHS for the linear system in the Krylov Subspace.
                    g[inner:inner + 2] = np.dot(Qblock, g[inner:inner + 2])

                    # Apply effect of Givens Rotation to H
                    H[inner, inner] = dotu(Qblock[0, :], H[inner,
                                                           inner:inner + 2])
                    H[inner, inner + 1] = 0.0

            niter += 1

            # Do not update normr if last inner iteration, because
            # normr is calculated directly after this loop ends.
            if inner < max_inner - 1:
                normr = np.abs(g[inner + 1])
                if normr < tol * normMb:
                    break

                if residuals is not None:
                    residuals.append(normr)

                if callback is not None:
                    y = sp.linalg.solve(H[0:inner + 1, 0:inner + 1].T,
                                        g[0:inner + 1])
                    update = np.ravel(V[:inner + 1, :].T.dot(y.reshape(-1, 1)))
                    callback(x + update)

        # end inner loop, back to outer loop

        # Find best update to x in Krylov Space V.  Solve inner x inner system.
        y = sp.linalg.solve(H[0:inner + 1, 0:inner + 1].T, g[0:inner + 1])
        update = np.ravel(V[:inner + 1, :].T.dot(y.reshape(-1, 1)))
        x = x + update
        r = b - A @ x

        # Apply preconditioner
        r = M @ r
        normr = norm(r)

        # Allow user access to the iterates
        if callback is not None:
            callback(x)

        if residuals is not None:
            residuals.append(normr)

        # Has GMRES stagnated?
        indices = (x != 0)
        if indices.any():
            change = np.max(np.abs(update[indices] / x[indices]))
            if change < 1e-12:
                # No change, halt
                return (postprocess(x), -1)

        # test for convergence
        if normr < tol * normMb:
            return (postprocess(x), 0)

    # end outer loop

    return (postprocess(x), niter)
Ejemplo n.º 47
0
def _fgmres(matvec,
            v0,
            m,
            atol,
            lpsolve=None,
            rpsolve=None,
            cs=(),
            outer_v=(),
            prepend_outer_v=False):
    """
    FGMRES Arnoldi process, with optional projection or augmentation

    Parameters
    ----------
    matvec : callable
        Operation A*x
    v0 : ndarray
        Initial vector, normalized to nrm2(v0) == 1
    m : int
        Number of GMRES rounds
    atol : float
        Absolute tolerance for early exit
    lpsolve : callable
        Left preconditioner L
    rpsolve : callable
        Right preconditioner R
    CU : list of (ndarray, ndarray)
        Columns of matrices C and U in GCROT
    outer_v : list of ndarrays
        Augmentation vectors in LGMRES
    prepend_outer_v : bool, optional
        Whether augmentation vectors come before or after
        Krylov iterates

    Raises
    ------
    LinAlgError
        If nans encountered

    Returns
    -------
    Q, R : ndarray
        QR decomposition of the upper Hessenberg H=QR
    B : ndarray
        Projections corresponding to matrix C
    vs : list of ndarray
        Columns of matrix V
    zs : list of ndarray
        Columns of matrix Z
    y : ndarray
        Solution to ||H y - e_1||_2 = min!
    res : float
        The final (preconditioned) residual norm

    """

    if lpsolve is None:
        lpsolve = lambda x: x
    if rpsolve is None:
        rpsolve = lambda x: x

    axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'],
                                           (v0, ))

    vs = [v0]
    zs = []
    y = None
    res = np.nan

    m = m + len(outer_v)

    # Orthogonal projection coefficients
    B = np.zeros((len(cs), m), dtype=v0.dtype)

    # H is stored in QR factorized form
    Q = np.ones((1, 1), dtype=v0.dtype)
    R = np.zeros((1, 0), dtype=v0.dtype)

    eps = np.finfo(v0.dtype).eps

    breakdown = False

    # FGMRES Arnoldi process
    for j in range(m):
        # L A Z = C B + V H

        if prepend_outer_v and j < len(outer_v):
            z, w = outer_v[j]
        elif prepend_outer_v and j == len(outer_v):
            z = rpsolve(v0)
            w = None
        elif not prepend_outer_v and j >= m - len(outer_v):
            z, w = outer_v[j - (m - len(outer_v))]
        else:
            z = rpsolve(vs[-1])
            w = None

        if w is None:
            w = lpsolve(matvec(z))
        else:
            # w is clobbered below
            w = w.copy()

        w_norm = nrm2(w)

        # GCROT projection: L A -> (1 - C C^H) L A
        # i.e. orthogonalize against C
        for i, c in enumerate(cs):
            alpha = dot(c, w)
            B[i, j] = alpha
            w = axpy(c, w, c.shape[0], -alpha)  # w -= alpha*c

        # Orthogonalize against V
        hcur = np.zeros(j + 2, dtype=Q.dtype)
        for i, v in enumerate(vs):
            alpha = dot(v, w)
            hcur[i] = alpha
            w = axpy(v, w, v.shape[0], -alpha)  # w -= alpha*v
        hcur[i + 1] = nrm2(w)

        with np.errstate(over='ignore', divide='ignore'):
            # Careful with denormals
            alpha = 1 / hcur[-1]

        if np.isfinite(alpha):
            w = scal(alpha, w)

        if not (hcur[-1] > eps * w_norm):
            # w essentially in the span of previous vectors,
            # or we have nans. Bail out after updating the QR
            # solution.
            breakdown = True

        vs.append(w)
        zs.append(z)

        # Arnoldi LSQ problem

        # Add new column to H=Q@R, padding other columns with zeros
        Q2 = np.zeros((j + 2, j + 2), dtype=Q.dtype, order='F')
        Q2[:j + 1, :j + 1] = Q
        Q2[j + 1, j + 1] = 1

        R2 = np.zeros((j + 2, j), dtype=R.dtype, order='F')
        R2[:j + 1, :] = R

        Q, R = qr_insert(Q2,
                         R2,
                         hcur,
                         j,
                         which='col',
                         overwrite_qru=True,
                         check_finite=False)

        # Transformed least squares problem
        # || Q R y - inner_res_0 * e_1 ||_2 = min!
        # Since R = [R'; 0], solution is y = inner_res_0 (R')^{-1} (Q^H)[:j,0]

        # Residual is immediately known
        res = abs(Q[0, -1])

        # Check for termination
        if res < atol or breakdown:
            break

    if not np.isfinite(R[j, j]):
        # nans encountered, bail out
        raise LinAlgError()

    # -- Get the LSQ problem solution

    # The problem is triangular, but the condition number may be
    # bad (or in case of breakdown the last diagonal entry may be
    # zero), so use lstsq instead of trtrs.
    y, _, _, _, = lstsq(R[:j + 1, :j + 1], Q[0, :j + 1].conj())

    B = B[:, :j + 1]

    return Q, R, B, vs, zs, y, res
Ejemplo n.º 48
0
def gmres_mgs(surf_array, field_array, X, b, param, ind0, timing, kernel):
    """
    GMRES solver.

    Arguments
    ----------
    surf_array : array, contains the surface classes of each region on the
                        surface.
    field_array: array, contains the Field classes of each region on the surface.
    X          : array, initial guess.
    b          : array, right hand side.
    param      : class, parameters related to the surface.
    ind0       : class, it contains the indices related to the treecode
                        computation.
    timing     : class, it contains timing information for different parts of
                        the code.
    kernel     : pycuda source module.

    Returns
    --------
    X          : array, an updated guess to the solution.
    iteration  : int, number of outer iterations for convergence

    References
    ----------
    .. [1] Yousef Saad, "Iterative Methods for Sparse Linear Systems,
       Second Edition", SIAM, pp. 151-172, pp. 272-275, 2003
       http://www-users.cs.umn.edu/~saad/books.html
    .. [2] C. T. Kelley, http://www4.ncsu.edu/~ctk/matlab_roots.html
    """

    # Defining xtype as dtype of the problem, to decide which BLAS functions
    # import.
    xtype = upcast(X.dtype, b.dtype)

    # Get fast access to underlying BLAS routines
    # dotc is the conjugate dot, dotu does no conjugation

    [lartg] = get_lapack_funcs(['lartg'], [X])
    if numpy.iscomplexobj(numpy.zeros((1, ), dtype=xtype)):
        [axpy, dotu, dotc, scal] =\
            get_blas_funcs(['axpy', 'dotu', 'dotc', 'scal'], [X])
    else:
        # real type
        [axpy, dotu, dotc, scal] =\
            get_blas_funcs(['axpy', 'dot', 'dot', 'scal'], [X])

    # Make full use of direct access to BLAS by defining own norm
    def norm(z):
        return numpy.sqrt(numpy.real(dotc(z, z)))

    # Defining dimension
    dimen = len(X)

    max_iter = param.max_iter
    R = param.restart
    tol = param.tol

    # Set number of outer and inner iterations

    if R > dimen:
        warn('Setting number of inner iterations (restrt) to maximum\
              allowed, which is A.shape[0] ')
        R = dimen

    max_inner = R

    #max_outer should be max_iter/max_inner but this might not be an integer
    #so we get the ceil of the division.
    #In the inner loop there is a if statement to break in case max_iter is
    #reached.

    max_outer = int(numpy.ceil(max_iter / max_inner))

    # Prep for method
    aux = gmres_dot(X, surf_array, field_array, ind0, param, timing, kernel)
    r = b - aux

    normr = norm(r)

    # Check initial guess ( scaling by b, if b != 0, must account for
    # case when norm(b) is very small)
    normb = norm(b)
    if normb == 0.0:
        normb = 1.0
    if normr < tol * normb:
        return X, 0

    iteration = 0

    # Here start the GMRES
    for outer in range(max_outer):

        # Preallocate for Givens Rotations, Hessenberg matrix and Krylov Space
        # Space required is O(dimen*max_inner).
        # NOTE:  We are dealing with row-major matrices, so we traverse in a
        #        row-major fashion,
        #        i.e., H and V's transpose is what we store.

        Q = []  # Initialzing Givens Rotations
        # Upper Hessenberg matrix, which is then
        # converted to upper triagonal with Givens Rotations

        H = numpy.zeros((max_inner + 1, max_inner + 1), dtype=xtype)
        V = numpy.zeros((max_inner + 1, dimen), dtype=xtype)  # Krylov space

        # vs store the pointers to each column of V.
        # This saves a considerable amount of time.
        vs = []

        # v = r/normr
        V[0, :] = scal(1.0 / normr, r)  # scal wrapper of dscal --> x = a*x
        vs.append(V[0, :])

        #Saving initial residual to be used to calculate the rel_resid
        if iteration == 0:
            res_0 = normb

        #RHS vector in the Krylov space
        g = numpy.zeros((dimen, ), dtype=xtype)
        g[0] = normr

        for inner in range(max_inner):
            #New search direction
            v = V[inner + 1, :]
            v[:] = gmres_dot(vs[-1], surf_array, field_array, ind0, param,
                             timing, kernel)
            vs.append(v)

            #Modified Gram Schmidt
            for k in range(inner + 1):
                vk = vs[k]
                alpha = dotc(vk, v)
                H[inner, k] = alpha
                v[:] = axpy(vk, v, dimen, -alpha)  # y := a*x + y
                #axpy is a wrapper for daxpy (blas function)

            normv = norm(v)
            H[inner, inner + 1] = normv

            #Check for breakdown
            if H[inner, inner + 1] != 0.0:
                v[:] = scal(1.0 / H[inner, inner + 1], v)

            #Apply for Givens rotations to H
            if inner > 0:
                apply_givens(Q, H[inner, :], inner)

            #Calculate and apply next complex-valued Givens rotations

            #If max_inner = dimen, we don't need to calculate, this
            #is unnecessary for the last inner iteration when inner = dimen -1

            if inner != dimen - 1:
                if H[inner, inner + 1] != 0:
                    #lartg is a lapack function that computes the parameters
                    #for a Givens rotation
                    [c, s, _] = lartg(H[inner, inner], H[inner, inner + 1])
                    Qblock = numpy.array([[c, s], [-numpy.conjugate(s), c]],
                                         dtype=xtype)
                    Q.append(Qblock)

                    #Apply Givens Rotations to RHS for the linear system in
                    # the krylov space.
                    g[inner:inner + 2] = scipy.dot(Qblock, g[inner:inner + 2])

                    #Apply Givens rotations to H
                    H[inner, inner] = dotu(Qblock[0, :], H[inner,
                                                           inner:inner + 2])
                    H[inner, inner + 1] = 0.0

            iteration += 1

            if inner < max_inner - 1:
                normr = abs(g[inner + 1])
                rel_resid = normr / res_0

                if rel_resid < tol:
                    break

            if iteration % 1 == 0:
                print('Iteration: {}, relative residual: {}'.format(
                    iteration, rel_resid))

            if (inner + 1 == R):
                print('Residual: {}. Restart...'.format(rel_resid))

            if iteration == max_iter:
                print(
                    'Warning!!!!'
                    'You have reached the maximum number of iterations : {}.'.
                    format(iteration))
                print(
                    'The run will stop. Check the residual behaviour you might have a bug.'
                    'For future runs you might consider changing the tolerance or'
                    ' increasing the number of max_iter.')

                break

        # end inner loop, back to outer loop

        # Find best update to X in Krylov Space V.  Solve inner X inner system.
        y = scipy.linalg.solve(H[0:inner + 1, 0:inner + 1].T, g[0:inner + 1])
        update = numpy.ravel(scipy.mat(V[:inner + 1, :]).T * y.reshape(-1, 1))
        X = X + update
        aux = gmres_dot(X, surf_array, field_array, ind0, param, timing,
                        kernel)
        r = b - aux

        normr = norm(r)
        rel_resid = normr / res_0

        # test for convergence
        if rel_resid < tol:
            print('GMRES solve')
            print('Converged after {} iterations to a residual of {}'.format(
                iteration, rel_resid))
            print('Time weight vector: {}'.format(timing.time_mass))
            print('Time sort         : {}'.format(timing.time_sort))
            print('Time data transfer: {}'.format(timing.time_trans))
            print('Time P2M          : {}'.format(timing.time_P2M))
            print('Time M2M          : {}'.format(timing.time_M2M))
            print('Time M2P          : {}'.format(timing.time_M2P))
            print('Time P2P          : {}'.format(timing.time_P2P))
            print('\tTime analy: {}'.format(timing.time_an))

            return X, iteration

    #end outer loop

    return X, iteration
Ejemplo n.º 49
0
def test_fast_dot():
    """Check fast dot blas wrapper function"""
    if fast_dot is np.dot:
        return

    rng = np.random.RandomState(42)
    A = rng.random_sample([2, 10])
    B = rng.random_sample([2, 10])

    try:
        linalg.get_blas_funcs(['gemm'])[0]
        has_blas = True
    except (AttributeError, ValueError):
        has_blas = False

    if has_blas:
        # Test _fast_dot for invalid input.

        # Maltyped data.
        for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]:
            assert_raises(ValueError, _fast_dot, A.astype(dt1),
                          B.astype(dt2).T)

        # Malformed data.

        ## ndim == 0
        E = np.empty(0)
        assert_raises(ValueError, _fast_dot, E, E)

        ## ndim == 1
        assert_raises(ValueError, _fast_dot, A, A[0])

        ## ndim > 2
        assert_raises(ValueError, _fast_dot, A.T, np.array([A, A]))

        ## min(shape) == 1
        assert_raises(ValueError, _fast_dot, A, A[0, :][None, :])

        # test for matrix mismatch error
        assert_raises(ValueError, _fast_dot, A, A)

    # Test cov-like use case + dtypes.
    for dtype in ['f8', 'f4']:
        A = A.astype(dtype)
        B = B.astype(dtype)

        #  col < row
        C = np.dot(A.T, A)
        C_ = fast_dot(A.T, A)
        assert_almost_equal(C, C_, decimal=5)

        C = np.dot(A.T, B)
        C_ = fast_dot(A.T, B)
        assert_almost_equal(C, C_, decimal=5)

        C = np.dot(A, B.T)
        C_ = fast_dot(A, B.T)
        assert_almost_equal(C, C_, decimal=5)

    # Test square matrix * rectangular use case.
    A = rng.random_sample([2, 2])
    for dtype in ['f8', 'f4']:
        A = A.astype(dtype)
        B = B.astype(dtype)

        C = np.dot(A, B)
        C_ = fast_dot(A, B)
        assert_almost_equal(C, C_, decimal=5)

        C = np.dot(A.T, B)
        C_ = fast_dot(A.T, B)
        assert_almost_equal(C, C_, decimal=5)

    if has_blas:
        for x in [np.array([[d] * 10] * 2) for d in [np.inf, np.nan]]:
            assert_raises(ValueError, _fast_dot, x, x.T)
Ejemplo n.º 50
0
def _update_dict(dictionary,
                 Y,
                 code,
                 verbose=False,
                 return_r2=False,
                 random_state=None):
    """Update the dense dictionary factor in place.

    Parameters
    ----------
    dictionary: array of shape (n_features, n_atoms)
        Value of the dictionary at the previous iteration.

    Y: array of shape (n_features, n_samples)
        Data matrix.

    code: array of shape (n_atoms, n_samples)
        Sparse coding of the data against which to optimize the dictionary.

    verbose:
        Degree of output the procedure will print.

    return_r2: bool
        Whether to compute and return the residual sum of squares corresponding
        to the computed solution.

    random_state: int or RandomState
        Pseudo number generator state used for random sampling.

    Returns
    -------
    dictionary: array of shape (n_features, n_atoms)
        Updated dictionary.

    """
    n_atoms = len(code)
    n_samples = Y.shape[0]
    random_state = check_random_state(random_state)
    # Residuals, computed 'in-place' for efficiency
    R = -np.dot(dictionary, code)
    R += Y
    R = np.asfortranarray(R)
    ger, = linalg.get_blas_funcs(('ger', ), (dictionary, code))
    for k in xrange(n_atoms):
        # R <- 1.0 * U_k * V_k^T + R
        R = ger(1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True)
        dictionary[:, k] = np.dot(R, code[k, :].T)
        # Scale k'th atom
        atom_norm_square = np.dot(dictionary[:, k], dictionary[:, k])
        if atom_norm_square < 1e-20:
            if verbose == 1:
                sys.stdout.write("+")
                sys.stdout.flush()
            elif verbose:
                print "Adding new random atom"
            dictionary[:, k] = random_state.randn(n_samples)
            # Setting corresponding coefs to 0
            code[k, :] = 0.0
            dictionary[:, k] /= sqrt(np.dot(dictionary[:, k], dictionary[:,
                                                                         k]))
        else:
            dictionary[:, k] /= sqrt(atom_norm_square)
            # R <- -1.0 * U_k * V_k^T + R
            R = ger(-1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True)
    if return_r2:
        R **= 2
        # R is fortran-ordered. For numpy version < 1.6, sum does not
        # follow the quick striding first, and is thus inefficient on
        # fortran ordered data. We take a flat view of the data with no
        # striding
        R = as_strided(R, shape=(R.size, ), strides=(R.dtype.itemsize, ))
        R = np.sum(R)
        return dictionary, R
    return dictionary
Ejemplo n.º 51
0
def _gram_omp(Gram,
              Xy,
              n_nonzero_coefs,
              tol_0=None,
              tol=None,
              copy_Gram=True,
              copy_Xy=True):
    """Orthogonal Matching Pursuit step on a precomputed Gram matrix.

    This function uses the the Cholesky decomposition method.

    Parameters:
    -----------
    Gram: array, shape = (n_features, n_features)
        Gram matrix of the input data matrix

    Xy: array, shape = (n_features,)
        Input targets

    n_nonzero_coefs: int
        Targeted number of non-zero elements

    tol_0: float
        Squared norm of y, required if tol is not None.

    tol: float
        Targeted squared error, if not None overrides n_nonzero_coefs.

    copy_Gram: bool, optional
        Whether the gram matrix must be copied by the algorithm. A false
        value is only helpful if it is already Fortran-ordered, otherwise a
        copy is made anyway.

    copy_Xy: bool, optional
        Whether the covariance vector Xy must be copied by the algorithm.
        If False, it may be overwritten.

    Returns:
    --------
    gamma: array, shape = (n_nonzero_coefs,)
        Non-zero elements of the solution

    idx: array, shape = (n_nonzero_coefs,)
        Indices of the positions of the elements in gamma within the solution
        vector

    """
    Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)

    if copy_Xy:
        Xy = Xy.copy()

    min_float = np.finfo(Gram.dtype).eps
    nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, ))
    potrs, = get_lapack_funcs(('potrs', ), (Gram, ))

    indices = range(len(Gram))  # keeping track of swapping
    alpha = Xy
    tol_curr = tol_0
    delta = 0
    gamma = np.empty(0)
    n_active = 0

    max_features = len(Gram) if tol is not None else n_nonzero_coefs
    L = np.empty((max_features, max_features), dtype=Gram.dtype)
    L[0, 0] = 1.

    while True:
        lam = np.argmax(np.abs(alpha))
        if lam < n_active or alpha[lam]**2 < min_float:
            # selected same atom twice, or inner product too small
            warn(premature)
            break
        if n_active > 0:
            L[n_active, :n_active] = Gram[lam, :n_active]
            solve_triangular(L[:n_active, :n_active], L[n_active, :n_active])
            v = nrm2(L[n_active, :n_active])**2
            if 1 - v <= min_float:  # selected atoms are dependent
                warn(premature)
                break
            L[n_active, n_active] = np.sqrt(1 - v)
        Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])
        Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])
        indices[n_active], indices[lam] = indices[lam], indices[n_active]
        Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]
        n_active += 1
        # solves LL'x = y as a composition of two triangular systems
        gamma, _ = potrs(L[:n_active, :n_active],
                         Xy[:n_active],
                         lower=True,
                         overwrite_b=False)

        beta = np.dot(Gram[:, :n_active], gamma)
        alpha = Xy - beta
        if tol is not None:
            tol_curr += delta
            delta = np.inner(gamma, beta[:n_active])
            tol_curr -= delta
            if tol_curr <= tol:
                break
        elif n_active == max_features:
            break

    return gamma, indices[:n_active]
Ejemplo n.º 52
0
def _gram_omp(Gram,
              Xy,
              n_nonzero_coefs,
              eps_0=None,
              eps=None,
              overwrite_gram=False,
              overwrite_Xy=False):
    """
    Solves a single Orthogonal Matching Pursuit problem using
    the Cholesky decomposition, based on the Gram matrix and more
    precomputations.

    Parameters:
    -----------
    Gram: array, shape = (n_features, n_features)
        Gram matrix of the input data matrix

    Xy: array, shape = (n_features,)
        Input targets

    n_nonzero_coefs: int
        Targeted number of non-zero elements

    eps_0: float
        Squared norm of y, required if eps is not None.

    eps: float
        Targeted squared error, if not None overrides n_nonzero_coefs.

    overwrite_gram: bool,
        Whether the gram matrix can be overwritten by the algorithm. This
        is only helpful if it is already Fortran-ordered, otherwise a copy
        is made anyway.

    overwrite_xy: bool,
        Whether the covariance vector Xy can be overwritten by the algorithm.

    Returns:
    --------
    gamma: array, shape = (n_nonzero_coefs,)
        Non-zero elements of the solution

    idx: array, shape = (n_nonzero_coefs,)
        Indices of the positions of the elements in gamma within the solution
        vector

    """
    if not overwrite_gram:
        Gram = Gram.copy('F')
    else:
        Gram = np.asfortranarray(Gram)

    if not overwrite_Xy:
        Xy = Xy.copy()

    min_float = np.finfo(Gram.dtype).eps
    nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, ))
    potrs, = get_lapack_funcs(('potrs', ), (Gram, ))

    idx = []
    alpha = Xy
    eps_curr = eps_0
    delta = 0
    n_active = 0

    max_features = len(Gram) if eps is not None else n_nonzero_coefs
    L = np.empty((max_features, max_features), dtype=Gram.dtype)
    L[0, 0] = 1.

    while True:
        lam = np.argmax(np.abs(alpha))
        if lam < n_active or alpha[lam]**2 < min_float:
            # selected same atom twice, or inner product too small
            warn(premature)
            break
        if n_active > 0:
            L[n_active, :n_active] = Gram[lam, :n_active]
            solve_triangular(L[:n_active, :n_active], L[n_active, :n_active])
            v = nrm2(L[n_active, :n_active])**2
            if 1 - v <= min_float:  # selected atoms are dependent
                warn(premature)
                break
            L[n_active, n_active] = np.sqrt(1 - v)
        idx.append(lam)
        Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])
        Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])
        Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]
        n_active += 1
        # solves LL'x = y as a composition of two triangular systems
        gamma, _ = potrs(L[:n_active, :n_active],
                         Xy[:n_active],
                         lower=True,
                         overwrite_b=False)

        beta = np.dot(Gram[:, :n_active], gamma)
        alpha = Xy - beta
        if eps is not None:
            eps_curr += delta
            delta = np.inner(gamma, beta[:n_active])
            eps_curr -= delta
            if eps_curr <= eps:
                break
        elif n_active == max_features:
            break

    return gamma, idx
Ejemplo n.º 53
0
def lgmres(A,
           b,
           x0=None,
           tol=1e-5,
           maxiter=1000,
           M=None,
           callback=None,
           inner_m=30,
           outer_k=3,
           outer_v=None,
           store_outer_Av=True,
           prepend_outer_v=False,
           atol=None):
    """
    Solve a matrix equation using the LGMRES algorithm.

    The LGMRES algorithm [1]_ [2]_ is designed to avoid some problems
    in the convergence in restarted GMRES, and often converges in fewer
    iterations.

    Parameters
    ----------
    A : {sparse matrix, ndarray, LinearOperator}
        The real or complex N-by-N matrix of the linear system.
        Alternatively, ``A`` can be a linear operator which can
        produce ``Ax`` using, e.g.,
        ``scipy.sparse.linalg.LinearOperator``.
    b : ndarray
        Right hand side of the linear system. Has shape (N,) or (N,1).
    x0 : ndarray
        Starting guess for the solution.
    tol, atol : float, optional
        Tolerances for convergence, ``norm(residual) <= max(tol*norm(b), atol)``.
        The default for ``atol`` is `tol`.

        .. warning::

           The default value for `atol` will be changed in a future release.
           For future compatibility, specify `atol` explicitly.
    maxiter : int, optional
        Maximum number of iterations.  Iteration will stop after maxiter
        steps even if the specified tolerance has not been achieved.
    M : {sparse matrix, ndarray, LinearOperator}, optional
        Preconditioner for A.  The preconditioner should approximate the
        inverse of A.  Effective preconditioning dramatically improves the
        rate of convergence, which implies that fewer iterations are needed
        to reach a given error tolerance.
    callback : function, optional
        User-supplied function to call after each iteration.  It is called
        as callback(xk), where xk is the current solution vector.
    inner_m : int, optional
        Number of inner GMRES iterations per each outer iteration.
    outer_k : int, optional
        Number of vectors to carry between inner GMRES iterations.
        According to [1]_, good values are in the range of 1...3.
        However, note that if you want to use the additional vectors to
        accelerate solving multiple similar problems, larger values may
        be beneficial.
    outer_v : list of tuples, optional
        List containing tuples ``(v, Av)`` of vectors and corresponding
        matrix-vector products, used to augment the Krylov subspace, and
        carried between inner GMRES iterations. The element ``Av`` can
        be `None` if the matrix-vector product should be re-evaluated.
        This parameter is modified in-place by `lgmres`, and can be used
        to pass "guess" vectors in and out of the algorithm when solving
        similar problems.
    store_outer_Av : bool, optional
        Whether LGMRES should store also A@v in addition to vectors `v`
        in the `outer_v` list. Default is True.
    prepend_outer_v : bool, optional
        Whether to put outer_v augmentation vectors before Krylov iterates.
        In standard LGMRES, prepend_outer_v=False.

    Returns
    -------
    x : ndarray
        The converged solution.
    info : int
        Provides convergence information:

            - 0  : successful exit
            - >0 : convergence to tolerance not achieved, number of iterations
            - <0 : illegal input or breakdown

    Notes
    -----
    The LGMRES algorithm [1]_ [2]_ is designed to avoid the
    slowing of convergence in restarted GMRES, due to alternating
    residual vectors. Typically, it often outperforms GMRES(m) of
    comparable memory requirements by some measure, or at least is not
    much worse.

    Another advantage in this algorithm is that you can supply it with
    'guess' vectors in the `outer_v` argument that augment the Krylov
    subspace. If the solution lies close to the span of these vectors,
    the algorithm converges faster. This can be useful if several very
    similar matrices need to be inverted one after another, such as in
    Newton-Krylov iteration where the Jacobian matrix often changes
    little in the nonlinear steps.

    References
    ----------
    .. [1] A.H. Baker and E.R. Jessup and T. Manteuffel, "A Technique for
             Accelerating the Convergence of Restarted GMRES", SIAM J. Matrix
             Anal. Appl. 26, 962 (2005).
    .. [2] A.H. Baker, "On Improving the Performance of the Linear Solver
             restarted GMRES", PhD thesis, University of Colorado (2003).

    Examples
    --------
    >>> from scipy.sparse import csc_matrix
    >>> from scipy.sparse.linalg import lgmres
    >>> A = csc_matrix([[3, 2, 0], [1, -1, 0], [0, 5, 1]], dtype=float)
    >>> b = np.array([2, 4, -1], dtype=float)
    >>> x, exitCode = lgmres(A, b)
    >>> print(exitCode)            # 0 indicates successful convergence
    0
    >>> np.allclose(A.dot(x), b)
    True
    """
    A, M, x, b, postprocess = make_system(A, M, x0, b)

    if not np.isfinite(b).all():
        raise ValueError("RHS must contain only finite numbers")

    if atol is None:
        warnings.warn(
            "scipy.sparse.linalg.lgmres called without specifying `atol`. "
            "The default value will change in the future. To preserve "
            "current behavior, set ``atol=tol``.",
            category=DeprecationWarning,
            stacklevel=2)
        atol = tol

    matvec = A.matvec
    psolve = M.matvec

    if outer_v is None:
        outer_v = []

    axpy, dot, scal = None, None, None
    nrm2 = get_blas_funcs('nrm2', [b])

    b_norm = nrm2(b)
    if b_norm == 0:
        x = b
        return (postprocess(x), 0)

    ptol_max_factor = 1.0

    for k_outer in range(maxiter):
        r_outer = matvec(x) - b

        # -- callback
        if callback is not None:
            callback(x)

        # -- determine input type routines
        if axpy is None:
            if np.iscomplexobj(r_outer) and not np.iscomplexobj(x):
                x = x.astype(r_outer.dtype)
            axpy, dot, scal, nrm2 = get_blas_funcs(
                ['axpy', 'dot', 'scal', 'nrm2'], (x, r_outer))

        # -- check stopping condition
        r_norm = nrm2(r_outer)
        if r_norm <= max(atol, tol * b_norm):
            break

        # -- inner LGMRES iteration
        v0 = -psolve(r_outer)
        inner_res_0 = nrm2(v0)

        if inner_res_0 == 0:
            rnorm = nrm2(r_outer)
            raise RuntimeError("Preconditioner returned a zero vector; "
                               "|v| ~ %.1g, |M v| = 0" % rnorm)

        v0 = scal(1.0 / inner_res_0, v0)

        ptol = min(ptol_max_factor, max(atol, tol * b_norm) / r_norm)

        try:
            Q, R, B, vs, zs, y, pres = _fgmres(matvec,
                                               v0,
                                               inner_m,
                                               lpsolve=psolve,
                                               atol=ptol,
                                               outer_v=outer_v,
                                               prepend_outer_v=prepend_outer_v)
            y *= inner_res_0
            if not np.isfinite(y).all():
                # Overflow etc. in computation. There's no way to
                # recover from this, so we have to bail out.
                raise LinAlgError()
        except LinAlgError:
            # Floating point over/underflow, non-finite result from
            # matmul etc. -- report failure.
            return postprocess(x), k_outer + 1

        # Inner loop tolerance control
        if pres > ptol:
            ptol_max_factor = min(1.0, 1.5 * ptol_max_factor)
        else:
            ptol_max_factor = max(1e-16, 0.25 * ptol_max_factor)

        # -- GMRES terminated: eval solution
        dx = zs[0] * y[0]
        for w, yc in zip(zs[1:], y[1:]):
            dx = axpy(w, dx, dx.shape[0], yc)  # dx += w*yc

        # -- Store LGMRES augmentation vectors
        nx = nrm2(dx)
        if nx > 0:
            if store_outer_Av:
                q = Q.dot(R.dot(y))
                ax = vs[0] * q[0]
                for v, qc in zip(vs[1:], q[1:]):
                    ax = axpy(v, ax, ax.shape[0], qc)
                outer_v.append((dx / nx, ax / nx))
            else:
                outer_v.append((dx / nx, None))

        # -- Retain only a finite number of augmentation vectors
        while len(outer_v) > outer_k:
            del outer_v[0]

        # -- Apply step
        x += dx
    else:
        # didn't converge ...
        return postprocess(x), maxiter

    return postprocess(x), 0
Ejemplo n.º 54
0
def lars_path(X, y, Xy=None, Gram=None, max_features=None, max_iter=500,
              alpha_min=0, method='lar', overwrite_X=False,
              overwrite_Gram=False, verbose=False):
    """Compute Least Angle Regression and LASSO path

    Parameters
    -----------
    X: array, shape: (n_samples, n_features)
        Input data

    y: array, shape: (n_samples)
        Input targets

    max_features: integer, optional
        Maximum number of selected features.

    max_iter: integer, optional
        Maximum number of iterations to perform.

    Gram: None, 'auto', array, shape: (n_features, n_features), optional
        Precomputed Gram matrix (X' * X), if 'auto', the Gram
        matrix is precomputed from the given X, if there are more samples
        than features

    alpha_min: float, optional
        Minimum correlation along the path. It corresponds to the
        regularization parameter alpha parameter in the Lasso.

    method: 'lar' | 'lasso'
        Specifies the returned model. Select 'lar' for Least Angle
        Regression, 'lasso' for the Lasso.

    Returns
    --------
    alphas: array, shape: (max_features + 1,)
        Maximum of covariances (in absolute value) at each
        iteration.

    active: array, shape (max_features,)
        Indices of active variables at the end of the path.

    coefs: array, shape (n_features, max_features+1)
        Coefficients along the path

    See also
    --------
    :ref:`LassoLars`, :ref:`Lars`

    Notes
    ------
    * http://en.wikipedia.org/wiki/Least-angle_regression

    * http://en.wikipedia.org/wiki/Lasso_(statistics)#LASSO_method
    """

    n_features = X.shape[1]
    n_samples = y.size

    if max_features is None:
        max_features = min(n_samples, n_features)

    coefs = np.zeros((max_features + 1, n_features))
    alphas = np.zeros(max_features + 1)
    n_iter, n_active = 0, 0
    active, indices = list(), np.arange(n_features)
    # holds the sign of covariance
    sign_active = np.empty(max_features, dtype=np.int8)
    drop = False
    eps = np.finfo(X.dtype).eps

    # will hold the cholesky factorization. Only lower part is
    # referenced.
    L = np.empty((max_features, max_features), dtype=X.dtype)
    swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X,))
    potrs, = get_lapack_funcs(('potrs',), (X,))

    if Gram is None:
        if not overwrite_X:
            # force copy. setting the array to be fortran-ordered
            # speeds up the calculation of the (partial) Gram matrix
            # and allows to easily swap columns
            X = X.copy('F')
    elif Gram == 'auto':
        Gram = None
        if X.shape[0] > X.shape[1]:
            Gram = np.dot(X.T, X)
    else:
        if not overwrite_Gram:
            Gram = Gram.copy()

    if Xy is None:
        Cov = np.dot(X.T, y)
    else:
        Cov = Xy.copy()

    if verbose:
        print "Step\t\tAdded\t\tDropped\t\tActive set size\t\tC"

    while 1:

        if Cov.size:
            C_idx = np.argmax(np.abs(Cov))
            C_ = Cov[C_idx]
            C = np.fabs(C_)
            # to match a for computing gamma_
        else:
            C = 0.

        alphas[n_iter] = C / n_samples

        # Check for early stopping
        if alphas[n_iter] < alpha_min:  # interpolate
            # interpolation factor 0 <= ss < 1
            if n_iter > 0:
                # In the first iteration, all alphas are zero, the formula
                # below would make ss a NaN
                ss = (alphas[n_iter - 1] - alpha_min) / (alphas[n_iter - 1] -
                                                    alphas[n_iter])
                coefs[n_iter] = coefs[n_iter - 1] + ss * (coefs[n_iter] -
                                coefs[n_iter - 1])
            alphas[n_iter] = alpha_min
            break

        if n_active == max_features or n_iter == max_iter:
            break

        if not drop:

            # Update the Cholesky factorization of (Xa * Xa') #
            #                                                 #
            #            ( L   0 )                            #
            #     L  ->  (       )  , where L * w = b         #
            #            ( w   z )    z = 1 - ||w||           #
            #                                                 #
            #   where u is the last added to the active set   #

            sign_active[n_active] = np.sign(C_)
            m, n = n_active, C_idx + n_active

            Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
            indices[n], indices[m] = indices[m], indices[n]
            Cov = Cov[1:]  # remove Cov[0]

            if Gram is None:
                X.T[n], X.T[m] = swap(X.T[n], X.T[m])
                c = nrm2(X.T[n_active]) ** 2
                L[n_active, :n_active] = \
                    np.dot(X.T[n_active], X.T[:n_active].T)
            else:
                # swap does only work inplace if matrix is fortran
                # contiguous ...
                Gram[m], Gram[n] = swap(Gram[m], Gram[n])
                Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n])
                c = Gram[n_active, n_active]
                L[n_active, :n_active] = Gram[n_active, :n_active]

            # Update the cholesky decomposition for the Gram matrix
            arrayfuncs.solve_triangular(L[:n_active, :n_active],
                                        L[n_active, :n_active])
            v = np.dot(L[n_active, :n_active], L[n_active, :n_active])
            diag = max(np.sqrt(np.abs(c - v)), eps)
            L[n_active,  n_active] = diag

            active.append(indices[n_active])
            n_active += 1

            if verbose:
                print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '',
                                                            n_active, C)

        # least squares solution
        least_squares, info = potrs(L[:n_active, :n_active],
                               sign_active[:n_active], lower=True)

        # is this really needed ?
        AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active]))
        least_squares *= AA

        if Gram is None:
            # equiangular direction of variables in the active set
            eq_dir = np.dot(X.T[:n_active].T, least_squares)
            # correlation between each unactive variables and
            # eqiangular vector
            corr_eq_dir = np.dot(X.T[n_active:], eq_dir)
        else:
            # if huge number of features, this takes 50% of time, I
            # think could be avoided if we just update it using an
            # orthogonal (QR) decomposition of X
            corr_eq_dir = np.dot(Gram[:n_active, n_active:].T,
                                 least_squares)

        g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir))
        g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir))
        gamma_ = min(g1, g2, C / AA)

        # TODO: better names for these variables: z
        drop = False
        z = - coefs[n_iter, active] / least_squares
        z_pos = arrayfuncs.min_pos(z)
        if z_pos < gamma_:

            # some coefficients have changed sign
            idx = np.where(z == z_pos)[0]

            # update the sign, important for LAR
            sign_active[idx] = -sign_active[idx]

            if method == 'lasso':
                gamma_ = z_pos
            drop = True

        n_iter += 1

        if n_iter >= coefs.shape[0]:
            # resize the coefs and alphas array
            add_features = 2 * max(1, (max_features - n_active))
            coefs.resize((n_iter + add_features, n_features))
            alphas.resize(n_iter + add_features)

        coefs[n_iter, active] = coefs[n_iter - 1, active] + \
                                gamma_ * least_squares

        # update correlations
        Cov -= gamma_ * corr_eq_dir

        if n_active > n_features:
            break

        # See if any coefficient has changed sign
        if drop and method == 'lasso':

            arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx)

            n_active -= 1
            m, n = idx, n_active
            drop_idx = active.pop(idx)

            if Gram is None:
                # propagate dropped variable
                for i in range(idx, n_active):
                    X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1])
                    indices[i], indices[i + 1] =  \
                            indices[i + 1], indices[i]  # yeah this is stupid

                # TODO: this could be updated
                residual = y - np.dot(X[:, :n_active],
                                      coefs[n_iter, active])
                temp = np.dot(X.T[n_active], residual)

                Cov = np.r_[temp, Cov]
            else:
                for i in range(idx, n_active):
                    indices[i], indices[i + 1] =  \
                                indices[i + 1], indices[i]
                    Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1])
                    Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i],
                                                      Gram[:, i + 1])

                # Cov_n = Cov_j + x_j * X + increment(betas) TODO:
                # will this still work with multiple drops ?

                # recompute covariance. Probably could be done better
                # wrong as Xy is not swapped with the rest of variables

                # TODO: this could be updated
                residual = y - np.dot(X, coefs[n_iter])
                temp = np.dot(X.T[drop_idx], residual)
                Cov = np.r_[temp, Cov]

            sign_active = np.delete(sign_active, idx)
            sign_active = np.append(sign_active, 0.)  # just to maintain size
            if verbose:
                print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx,
                                                      n_active, abs(temp))

    # resize coefs in case of early stop
    alphas = alphas[:n_iter + 1]
    coefs = coefs[:n_iter + 1]

    return alphas, active, coefs.T
Ejemplo n.º 55
0
def norm(v):
    v = np.asarray(v)
    __nrm2, = linalg.get_blas_funcs(['nrm2'], [v])
    return __nrm2(v)
Ejemplo n.º 56
0
from scipy.linalg import get_blas_funcs
import time
import numpy as np
st = time.time()
X = np.random.randn(10, 4)
Y = np.random.randn(7, 4).T
gemms = get_blas_funcs("gemm", [X, Y])
print("Time comparison of blas", gemms(1, X, Y))
print(time.time() - st)

st = time.time()

print("Time comparison of np.dot", np.dot(X, Y))
print(time.time() - st)

st = time.time()

print("Time comparison of np.matmul", np.matmul(X, Y))
print(time.time() - st)
Ejemplo n.º 57
0
def _gram_omp(Gram,
              Xy,
              n_nonzero_coefs,
              tol_0=None,
              tol=None,
              copy_Gram=True,
              copy_Xy=True,
              return_path=False):
    """Orthogonal Matching Pursuit step on a precomputed Gram matrix.

    This function uses the Cholesky decomposition method.

    Parameters
    ----------
    Gram : array, shape (n_features, n_features)
        Gram matrix of the input data matrix

    Xy : array, shape (n_features,)
        Input targets

    n_nonzero_coefs : int
        Targeted number of non-zero elements

    tol_0 : float
        Squared norm of y, required if tol is not None.

    tol : float
        Targeted squared error, if not None overrides n_nonzero_coefs.

    copy_Gram : bool, optional
        Whether the gram matrix must be copied by the algorithm. A false
        value is only helpful if it is already Fortran-ordered, otherwise a
        copy is made anyway.

    copy_Xy : bool, optional
        Whether the covariance vector Xy must be copied by the algorithm.
        If False, it may be overwritten.

    return_path : bool, optional. Default: False
        Whether to return every value of the nonzero coefficients along the
        forward path. Useful for cross-validation.

    Returns
    -------
    gamma : array, shape (n_nonzero_coefs,)
        Non-zero elements of the solution

    idx : array, shape (n_nonzero_coefs,)
        Indices of the positions of the elements in gamma within the solution
        vector

    coefs : array, shape (n_features, n_nonzero_coefs)
        The first k values of column k correspond to the coefficient value
        for the active features at that step. The lower left triangle contains
        garbage. Only returned if ``return_path=True``.

    n_active : int
        Number of active features at convergence.
    """
    Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)

    if copy_Xy or not Xy.flags.writeable:
        Xy = Xy.copy()

    min_float = np.finfo(Gram.dtype).eps
    nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, ))
    potrs, = get_lapack_funcs(('potrs', ), (Gram, ))

    indices = np.arange(len(Gram))  # keeping track of swapping
    alpha = Xy
    tol_curr = tol_0
    delta = 0
    gamma = np.empty(0)
    n_active = 0

    max_features = len(Gram) if tol is not None else n_nonzero_coefs

    L = np.empty((max_features, max_features), dtype=Gram.dtype)

    L[0, 0] = 1.
    if return_path:
        coefs = np.empty_like(L)

    while True:
        lam = np.argmax(np.abs(alpha))
        if lam < n_active or alpha[lam]**2 < min_float:
            # selected same atom twice, or inner product too small
            warnings.warn(premature, RuntimeWarning, stacklevel=3)
            break
        if n_active > 0:
            L[n_active, :n_active] = Gram[lam, :n_active]
            linalg.solve_triangular(L[:n_active, :n_active],
                                    L[n_active, :n_active],
                                    trans=0,
                                    lower=1,
                                    overwrite_b=True,
                                    check_finite=False)
            v = nrm2(L[n_active, :n_active])**2
            Lkk = Gram[lam, lam] - v
            if Lkk <= min_float:  # selected atoms are dependent
                warnings.warn(premature, RuntimeWarning, stacklevel=3)
                break
            L[n_active, n_active] = sqrt(Lkk)
        else:
            L[0, 0] = sqrt(Gram[lam, lam])

        Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])
        Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])
        indices[n_active], indices[lam] = indices[lam], indices[n_active]
        Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]
        n_active += 1
        # solves LL'x = X'y as a composition of two triangular systems
        gamma, _ = potrs(L[:n_active, :n_active],
                         Xy[:n_active],
                         lower=True,
                         overwrite_b=False)
        if return_path:
            coefs[:n_active, n_active - 1] = gamma
        beta = np.dot(Gram[:, :n_active], gamma)
        alpha = Xy - beta
        if tol is not None:
            tol_curr += delta
            delta = np.inner(gamma, beta[:n_active])
            tol_curr -= delta
            if abs(tol_curr) <= tol:
                break
        elif n_active == max_features:
            break

    if return_path:
        return gamma, indices[:n_active], coefs[:, :n_active], n_active
    else:
        return gamma, indices[:n_active], n_active
Ejemplo n.º 58
0
def lars_path(X,
              y,
              Xy=None,
              Gram=None,
              max_iter=500,
              alpha_min=0,
              method='lar',
              copy_X=True,
              eps=np.finfo(np.float).eps,
              copy_Gram=True,
              verbose=0,
              return_path=True,
              return_n_iter=False):
    """Compute Least Angle Regression or Lasso path using LARS algorithm [1]

    The optimization objective for the case method='lasso' is::

    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1

    in the case of method='lars', the objective function is only known in
    the form of an implicit equation (see discussion in [1])

    Parameters
    -----------
    X : array, shape: (n_samples, n_features)
        Input data.

    y : array, shape: (n_samples)
        Input targets.

    max_iter : integer, optional (default=500)
        Maximum number of iterations to perform, set to infinity for no limit.

    Gram : None, 'auto', array, shape: (n_features, n_features), optional
        Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram
        matrix is precomputed from the given X, if there are more samples
        than features.

    alpha_min : float, optional (default=0)
        Minimum correlation along the path. It corresponds to the
        regularization parameter alpha parameter in the Lasso.

    method : {'lar', 'lasso'}, optional (default='lar')
        Specifies the returned model. Select ``'lar'`` for Least Angle
        Regression, ``'lasso'`` for the Lasso.

    eps : float, optional (default=``np.finfo(np.float).eps``)
        The machine-precision regularization in the computation of the
        Cholesky diagonal factors. Increase this for very ill-conditioned
        systems.

    copy_X : bool, optional (default=True)
        If ``False``, ``X`` is overwritten.

    copy_Gram : bool, optional (default=True)
        If ``False``, ``Gram`` is overwritten.

    verbose : int (default=0)
        Controls output verbosity.

    return_path: bool, (optional=True)
        If ``return_path==True`` returns the entire path, else returns only the
        last point of the path.

    Returns
    --------
    alphas: array, shape: [n_alphas + 1]
        Maximum of covariances (in absolute value) at each iteration.
        ``n_alphas`` is either ``max_iter``, ``n_features`` or the
        number of nodes in the path with ``alpha >= alpha_min``, whichever
        is smaller.

    active: array, shape [n_alphas]
        Indices of active variables at the end of the path.

    coefs: array, shape (n_features, n_alphas + 1)
        Coefficients along the path

    n_iter : int
        Number of iterations run. Returned only if return_n_iter is set
        to True.

    See also
    --------
    lasso_path
    LassoLars
    Lars
    LassoLarsCV
    LarsCV
    sklearn.decomposition.sparse_encode

    References
    ----------
    .. [1] "Least Angle Regression", Effron et al.
           http://www-stat.stanford.edu/~tibs/ftp/lars.pdf

    .. [2] `Wikipedia entry on the Least-angle regression
           <http://en.wikipedia.org/wiki/Least-angle_regression>`_

    .. [3] `Wikipedia entry on the Lasso
           <http://en.wikipedia.org/wiki/Lasso_(statistics)#Lasso_method>`_

    """

    n_features = X.shape[1]
    n_samples = y.size
    max_features = min(max_iter, n_features)

    if return_path:
        coefs = np.zeros((max_features + 1, n_features))
        alphas = np.zeros(max_features + 1)
    else:
        coef, prev_coef = np.zeros(n_features), np.zeros(n_features)
        alpha, prev_alpha = np.array([0.]), np.array([0.])  # better ideas?

    n_iter, n_active = 0, 0
    active, indices = list(), np.arange(n_features)
    # holds the sign of covariance
    sign_active = np.empty(max_features, dtype=np.int8)
    drop = False

    # will hold the cholesky factorization. Only lower part is
    # referenced.
    # We are initializing this to "zeros" and not empty, because
    # it is passed to scipy linalg functions and thus if it has NaNs,
    # even if they are in the upper part that it not used, we
    # get errors raised.
    # Once we support only scipy > 0.12 we can use check_finite=False and
    # go back to "empty"
    L = np.zeros((max_features, max_features), dtype=X.dtype)
    swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X, ))
    solve_cholesky, = get_lapack_funcs(('potrs', ), (X, ))

    if Gram is None:
        if copy_X:
            # force copy. setting the array to be fortran-ordered
            # speeds up the calculation of the (partial) Gram matrix
            # and allows to easily swap columns
            X = X.copy('F')
    elif Gram == 'auto':
        Gram = None
        if X.shape[0] > X.shape[1]:
            Gram = np.dot(X.T, X)
    elif copy_Gram:
        Gram = Gram.copy()

    if Xy is None:
        Cov = np.dot(X.T, y)
    else:
        Cov = Xy.copy()

    if verbose:
        if verbose > 1:
            print("Step\t\tAdded\t\tDropped\t\tActive set size\t\tC")
        else:
            sys.stdout.write('.')
            sys.stdout.flush()

    tiny = np.finfo(np.float).tiny  # to avoid division by 0 warning
    tiny32 = np.finfo(np.float32).tiny  # to avoid division by 0 warning
    equality_tolerance = np.finfo(np.float32).eps

    while True:
        if Cov.size:
            C_idx = np.argmax(np.abs(Cov))
            C_ = Cov[C_idx]
            C = np.fabs(C_)
        else:
            C = 0.

        if return_path:
            alpha = alphas[n_iter, np.newaxis]
            coef = coefs[n_iter]
            prev_alpha = alphas[n_iter - 1, np.newaxis]
            prev_coef = coefs[n_iter - 1]

        alpha[0] = C / n_samples
        if alpha[0] <= alpha_min + equality_tolerance:  # early stopping
            if abs(alpha[0] - alpha_min) > equality_tolerance:
                # interpolation factor 0 <= ss < 1
                if n_iter > 0:
                    # In the first iteration, all alphas are zero, the formula
                    # below would make ss a NaN
                    ss = ((prev_alpha[0] - alpha_min) /
                          (prev_alpha[0] - alpha[0]))
                    coef[:] = prev_coef + ss * (coef - prev_coef)
                alpha[0] = alpha_min
            if return_path:
                coefs[n_iter] = coef
            break

        if n_iter >= max_iter or n_active >= n_features:
            break

        if not drop:

            ##########################################################
            # Append x_j to the Cholesky factorization of (Xa * Xa') #
            #                                                        #
            #            ( L   0 )                                   #
            #     L  ->  (       )  , where L * w = Xa' x_j          #
            #            ( w   z )    and z = ||x_j||                #
            #                                                        #
            ##########################################################

            sign_active[n_active] = np.sign(C_)
            m, n = n_active, C_idx + n_active

            Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
            indices[n], indices[m] = indices[m], indices[n]
            Cov_not_shortened = Cov
            Cov = Cov[1:]  # remove Cov[0]

            if Gram is None:
                X.T[n], X.T[m] = swap(X.T[n], X.T[m])
                c = nrm2(X.T[n_active])**2
                L[n_active, :n_active] = \
                    np.dot(X.T[n_active], X.T[:n_active].T)
            else:
                # swap does only work inplace if matrix is fortran
                # contiguous ...
                Gram[m], Gram[n] = swap(Gram[m], Gram[n])
                Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n])
                c = Gram[n_active, n_active]
                L[n_active, :n_active] = Gram[n_active, :n_active]

            # Update the cholesky decomposition for the Gram matrix
            if n_active:
                linalg.solve_triangular(L[:n_active, :n_active],
                                        L[n_active, :n_active],
                                        trans=0,
                                        lower=1,
                                        overwrite_b=True,
                                        **solve_triangular_args)

            v = np.dot(L[n_active, :n_active], L[n_active, :n_active])
            diag = max(np.sqrt(np.abs(c - v)), eps)
            L[n_active, n_active] = diag

            if diag < 1e-7:
                # The system is becoming too ill-conditioned.
                # We have degenerate vectors in our active set.
                # We'll 'drop for good' the last regressor added.

                # Note: this case is very rare. It is no longer triggered by the
                # test suite. The `equality_tolerance` margin added in 0.16.0 to
                # get early stopping to work consistently on all versions of
                # Python including 32 bit Python under Windows seems to make it
                # very difficult to trigger the 'drop for good' strategy.
                warnings.warn(
                    'Regressors in active set degenerate. '
                    'Dropping a regressor, after %i iterations, '
                    'i.e. alpha=%.3e, '
                    'with an active set of %i regressors, and '
                    'the smallest cholesky pivot element being %.3e' %
                    (n_iter, alpha, n_active, diag), ConvergenceWarning)
                # XXX: need to figure a 'drop for good' way
                Cov = Cov_not_shortened
                Cov[0] = 0
                Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
                continue

            active.append(indices[n_active])
            n_active += 1

            if verbose > 1:
                print("%s\t\t%s\t\t%s\t\t%s\t\t%s" %
                      (n_iter, active[-1], '', n_active, C))

        if method == 'lasso' and n_iter > 0 and prev_alpha[0] < alpha[0]:
            # alpha is increasing. This is because the updates of Cov are
            # bringing in too much numerical error that is greater than
            # than the remaining correlation with the
            # regressors. Time to bail out
            warnings.warn(
                'Early stopping the lars path, as the residues '
                'are small and the current value of alpha is no '
                'longer well controlled. %i iterations, alpha=%.3e, '
                'previous alpha=%.3e, with an active set of %i '
                'regressors.' % (n_iter, alpha, prev_alpha, n_active),
                ConvergenceWarning)
            break

        # least squares solution
        least_squares, info = solve_cholesky(L[:n_active, :n_active],
                                             sign_active[:n_active],
                                             lower=True)

        if least_squares.size == 1 and least_squares == 0:
            # This happens because sign_active[:n_active] = 0
            least_squares[...] = 1
            AA = 1.
        else:
            # is this really needed ?
            AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active]))

            if not np.isfinite(AA):
                # L is too ill-conditioned
                i = 0
                L_ = L[:n_active, :n_active].copy()
                while not np.isfinite(AA):
                    L_.flat[::n_active + 1] += (2**i) * eps
                    least_squares, info = solve_cholesky(
                        L_, sign_active[:n_active], lower=True)
                    tmp = max(np.sum(least_squares * sign_active[:n_active]),
                              eps)
                    AA = 1. / np.sqrt(tmp)
                    i += 1
            least_squares *= AA

        if Gram is None:
            # equiangular direction of variables in the active set
            eq_dir = np.dot(X.T[:n_active].T, least_squares)
            # correlation between each unactive variables and
            # eqiangular vector
            corr_eq_dir = np.dot(X.T[n_active:], eq_dir)
        else:
            # if huge number of features, this takes 50% of time, I
            # think could be avoided if we just update it using an
            # orthogonal (QR) decomposition of X
            corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares)

        g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
        g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))
        gamma_ = min(g1, g2, C / AA)

        # TODO: better names for these variables: z
        drop = False
        z = -coef[active] / (least_squares + tiny32)
        z_pos = arrayfuncs.min_pos(z)
        if z_pos < gamma_:
            # some coefficients have changed sign
            idx = np.where(z == z_pos)[0][::-1]

            # update the sign, important for LAR
            sign_active[idx] = -sign_active[idx]

            if method == 'lasso':
                gamma_ = z_pos
            drop = True

        n_iter += 1

        if return_path:
            if n_iter >= coefs.shape[0]:
                del coef, alpha, prev_alpha, prev_coef
                # resize the coefs and alphas array
                add_features = 2 * max(1, (max_features - n_active))
                coefs = np.resize(coefs, (n_iter + add_features, n_features))
                alphas = np.resize(alphas, n_iter + add_features)
            coef = coefs[n_iter]
            prev_coef = coefs[n_iter - 1]
            alpha = alphas[n_iter, np.newaxis]
            prev_alpha = alphas[n_iter - 1, np.newaxis]
        else:
            # mimic the effect of incrementing n_iter on the array references
            prev_coef = coef
            prev_alpha[0] = alpha[0]
            coef = np.zeros_like(coef)

        coef[active] = prev_coef[active] + gamma_ * least_squares

        # update correlations
        Cov -= gamma_ * corr_eq_dir

        # See if any coefficient has changed sign
        if drop and method == 'lasso':

            # handle the case when idx is not length of 1
            [
                arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii)
                for ii in idx
            ]

            n_active -= 1
            m, n = idx, n_active
            # handle the case when idx is not length of 1
            drop_idx = [active.pop(ii) for ii in idx]

            if Gram is None:
                # propagate dropped variable
                for ii in idx:
                    for i in range(ii, n_active):
                        X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1])
                        # yeah this is stupid
                        indices[i], indices[i + 1] = indices[i + 1], indices[i]

                # TODO: this could be updated
                residual = y - np.dot(X[:, :n_active], coef[active])
                temp = np.dot(X.T[n_active], residual)

                Cov = np.r_[temp, Cov]
            else:
                for ii in idx:
                    for i in range(ii, n_active):
                        indices[i], indices[i + 1] = indices[i + 1], indices[i]
                        Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1])
                        Gram[:, i], Gram[:,
                                         i + 1] = swap(Gram[:, i], Gram[:,
                                                                        i + 1])

                # Cov_n = Cov_j + x_j * X + increment(betas) TODO:
                # will this still work with multiple drops ?

                # recompute covariance. Probably could be done better
                # wrong as Xy is not swapped with the rest of variables

                # TODO: this could be updated
                residual = y - np.dot(X, coef)
                temp = np.dot(X.T[drop_idx], residual)
                Cov = np.r_[temp, Cov]

            sign_active = np.delete(sign_active, idx)
            sign_active = np.append(sign_active, 0.)  # just to maintain size
            if verbose > 1:
                print("%s\t\t%s\t\t%s\t\t%s\t\t%s" %
                      (n_iter, '', drop_idx, n_active, abs(temp)))

    if return_path:
        # resize coefs in case of early stop
        alphas = alphas[:n_iter + 1]
        coefs = coefs[:n_iter + 1]

        if return_n_iter:
            return alphas, active, coefs.T, n_iter
        else:
            return alphas, active, coefs.T
    else:
        if return_n_iter:
            return alpha, active, coef, n_iter
        else:
            return alpha, active, coef
Ejemplo n.º 59
0
def lars_path(X,
              y,
              Xy=None,
              Gram=None,
              max_iter=500,
              alpha_min=0,
              method='lar',
              copy_X=True,
              eps=np.finfo(np.float).eps,
              copy_Gram=True,
              verbose=False,
              return_path=True):
    """Compute Least Angle Regression and Lasso path

    The optimization objective for Lasso is::

    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1

    Parameters
    -----------
    X: array, shape: (n_samples, n_features)
        Input data

    y: array, shape: (n_samples)
        Input targets

    max_iter: integer, optional
        Maximum number of iterations to perform, set to infinity for no limit.

    Gram: None, 'auto', array, shape: (n_features, n_features), optional
        Precomputed Gram matrix (X' * X), if 'auto', the Gram
        matrix is precomputed from the given X, if there are more samples
        than features

    alpha_min: float, optional
        Minimum correlation along the path. It corresponds to the
        regularization parameter alpha parameter in the Lasso.

    method: {'lar', 'lasso'}
        Specifies the returned model. Select 'lar' for Least Angle
        Regression, 'lasso' for the Lasso.

    eps: float, optional
        The machine-precision regularization in the computation of the
        Cholesky diagonal factors. Increase this for very ill-conditioned
        systems.

    copy_X: bool
        If False, X is overwritten.

    copy_Gram: bool
        If False, Gram is overwritten.

    Returns
    --------
    alphas: array, shape: (max_features + 1,)
        Maximum of covariances (in absolute value) at each iteration.

    active: array, shape (max_features,)
        Indices of active variables at the end of the path.

    coefs: array, shape (n_features, max_features + 1)
        Coefficients along the path

    See also
    --------
    lasso_path
    LassoLars
    Lars
    LassoLarsCV
    LarsCV
    sklearn.decomposition.sparse_encode

    Notes
    ------
    * http://en.wikipedia.org/wiki/Least-angle_regression

    * http://en.wikipedia.org/wiki/Lasso_(statistics)#LASSO_method
    """

    n_features = X.shape[1]
    n_samples = y.size
    max_features = min(max_iter, n_features)

    if return_path:
        coefs = np.zeros((max_features + 1, n_features))
        alphas = np.zeros(max_features + 1)
    else:
        coef, prev_coef = np.zeros(n_features), np.zeros(n_features)
        alpha, prev_alpha = np.array([0.]), np.array([0.])  # better ideas?

    n_iter, n_active = 0, 0
    active, indices = list(), np.arange(n_features)
    # holds the sign of covariance
    sign_active = np.empty(max_features, dtype=np.int8)
    drop = False

    # will hold the cholesky factorization. Only lower part is
    # referenced.
    L = np.empty((max_features, max_features), dtype=X.dtype)
    swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X, ))
    solve_cholesky, = get_lapack_funcs(('potrs', ), (X, ))

    if Gram is None:
        if copy_X:
            # force copy. setting the array to be fortran-ordered
            # speeds up the calculation of the (partial) Gram matrix
            # and allows to easily swap columns
            X = X.copy('F')
    elif Gram == 'auto':
        Gram = None
        if X.shape[0] > X.shape[1]:
            Gram = np.dot(X.T, X)
    elif copy_Gram:
        Gram = Gram.copy()

    if Xy is None:
        Cov = np.dot(X.T, y)
    else:
        Cov = Xy.copy()

    if verbose:
        if verbose > 1:
            print "Step\t\tAdded\t\tDropped\t\tActive set size\t\tC"
        else:
            sys.stdout.write('.')
            sys.stdout.flush()

    tiny = np.finfo(np.float).tiny  # to avoid division by 0 warning
    tiny32 = np.finfo(np.float32).tiny  # to avoid division by 0 warning

    while True:
        if Cov.size:
            C_idx = np.argmax(np.abs(Cov))
            C_ = Cov[C_idx]
            C = np.fabs(C_)
        else:
            C = 0.

        if return_path:
            alpha = alphas[n_iter, np.newaxis]
            coef = coefs[n_iter]
            prev_alpha = alphas[n_iter - 1, np.newaxis]
            prev_coef = coefs[n_iter - 1]

        alpha[0] = C / n_samples
        if alpha[0] < alpha_min:  # early stopping
            # interpolation factor 0 <= ss < 1
            if n_iter > 0:
                # In the first iteration, all alphas are zero, the formula
                # below would make ss a NaN
                ss = (prev_alpha[0] - alpha_min) / (prev_alpha[0] - alpha[0])
                coef[:] = prev_coef + ss * (coef - prev_coef)
            alpha[0] = alpha_min
            if return_path:
                coefs[n_iter] = coef
            break

        if n_iter >= max_iter or n_active >= n_features:
            break

        if not drop:

            ##########################################################
            # Append x_j to the Cholesky factorization of (Xa * Xa') #
            #                                                        #
            #            ( L   0 )                                   #
            #     L  ->  (       )  , where L * w = Xa' x_j          #
            #            ( w   z )    and z = ||x_j||                #
            #                                                        #
            ##########################################################

            sign_active[n_active] = np.sign(C_)
            m, n = n_active, C_idx + n_active

            Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
            indices[n], indices[m] = indices[m], indices[n]
            Cov = Cov[1:]  # remove Cov[0]

            if Gram is None:
                X.T[n], X.T[m] = swap(X.T[n], X.T[m])
                c = nrm2(X.T[n_active])**2
                L[n_active, :n_active] = \
                    np.dot(X.T[n_active], X.T[:n_active].T)
            else:
                # swap does only work inplace if matrix is fortran
                # contiguous ...
                Gram[m], Gram[n] = swap(Gram[m], Gram[n])
                Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n])
                c = Gram[n_active, n_active]
                L[n_active, :n_active] = Gram[n_active, :n_active]

            # Update the cholesky decomposition for the Gram matrix
            arrayfuncs.solve_triangular(L[:n_active, :n_active],
                                        L[n_active, :n_active])
            v = np.dot(L[n_active, :n_active], L[n_active, :n_active])
            diag = max(np.sqrt(np.abs(c - v)), eps)
            L[n_active, n_active] = diag

            active.append(indices[n_active])
            n_active += 1

            if verbose > 1:
                print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '',
                                                      n_active, C)
        # least squares solution
        least_squares, info = solve_cholesky(L[:n_active, :n_active],
                                             sign_active[:n_active],
                                             lower=True)

        # is this really needed ?
        AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active]))

        if not np.isfinite(AA):
            # L is too ill-conditionned
            i = 0
            L_ = L[:n_active, :n_active].copy()
            while not np.isfinite(AA):
                L_.flat[::n_active + 1] += (2**i) * eps
                least_squares, info = solve_cholesky(L_,
                                                     sign_active[:n_active],
                                                     lower=True)
                tmp = max(np.sum(least_squares * sign_active[:n_active]), eps)
                AA = 1. / np.sqrt(tmp)
                i += 1
        least_squares *= AA

        if Gram is None:
            # equiangular direction of variables in the active set
            eq_dir = np.dot(X.T[:n_active].T, least_squares)
            # correlation between each unactive variables and
            # eqiangular vector
            corr_eq_dir = np.dot(X.T[n_active:], eq_dir)
        else:
            # if huge number of features, this takes 50% of time, I
            # think could be avoided if we just update it using an
            # orthogonal (QR) decomposition of X
            corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares)

        g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
        g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))
        gamma_ = min(g1, g2, C / AA)

        # TODO: better names for these variables: z
        drop = False
        z = -coef[active] / (least_squares + tiny32)
        z_pos = arrayfuncs.min_pos(z)
        if z_pos < gamma_:
            # some coefficients have changed sign
            idx = np.where(z == z_pos)[0]

            # update the sign, important for LAR
            sign_active[idx] = -sign_active[idx]

            if method == 'lasso':
                gamma_ = z_pos
            drop = True

        n_iter += 1

        if return_path:
            if n_iter >= coefs.shape[0]:
                del coef, alpha, prev_alpha, prev_coef
                # resize the coefs and alphas array
                add_features = 2 * max(1, (max_features - n_active))
                coefs.resize((n_iter + add_features, n_features))
                alphas.resize(n_iter + add_features)
            coef = coefs[n_iter]
            prev_coef = coefs[n_iter - 1]
            alpha = alphas[n_iter, np.newaxis]
            prev_alpha = alphas[n_iter - 1, np.newaxis]
        else:
            # mimic the effect of incrementing n_iter on the array references
            prev_coef = coef
            prev_alpha[0] = alpha[0]
            coef = np.zeros_like(coef)

        coef[active] = prev_coef[active] + gamma_ * least_squares

        # update correlations
        Cov -= gamma_ * corr_eq_dir

        # See if any coefficient has changed sign
        if drop and method == 'lasso':

            arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx)

            n_active -= 1
            m, n = idx, n_active
            drop_idx = active.pop(idx)

            if Gram is None:
                # propagate dropped variable
                for i in range(idx, n_active):
                    X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1])
                    indices[i], indices[i + 1] = \
                            indices[i + 1], indices[i]  # yeah this is stupid

                # TODO: this could be updated
                residual = y - np.dot(X[:, :n_active], coef[active])
                temp = np.dot(X.T[n_active], residual)

                Cov = np.r_[temp, Cov]
            else:
                for i in range(idx, n_active):
                    indices[i], indices[i + 1] = \
                                indices[i + 1], indices[i]
                    Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1])
                    Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:,
                                                                       i + 1])

                # Cov_n = Cov_j + x_j * X + increment(betas) TODO:
                # will this still work with multiple drops ?

                # recompute covariance. Probably could be done better
                # wrong as Xy is not swapped with the rest of variables

                # TODO: this could be updated
                residual = y - np.dot(X, coef)
                temp = np.dot(X.T[drop_idx], residual)
                Cov = np.r_[temp, Cov]

            sign_active = np.delete(sign_active, idx)
            sign_active = np.append(sign_active, 0.)  # just to maintain size
            if verbose > 1:
                print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx,
                                                      n_active, abs(temp))

    if return_path:
        # resize coefs in case of early stop
        alphas = alphas[:n_iter + 1]
        coefs = coefs[:n_iter + 1]

        return alphas, active, coefs.T
    else:
        return alpha, active, coef
Ejemplo n.º 60
0
def py_rect_maxvol(A,
                   tol=1.,
                   maxK=None,
                   min_add_K=None,
                   minK=None,
                   start_maxvol_iters=10,
                   identity_submatrix=True,
                   top_k_index=-1):
    """
    Python implementation of rectangular 2-volume maximization.

    See Also
    --------
    rect_maxvol
    """
    # tol2 - square of parameter tol
    tol2 = tol**2
    # N - number of rows, r - number of columns of matrix A
    N, r = A.shape
    # some work on parameters
    if N <= r:
        return np.arange(N, dtype=np.int32), np.eye(N, dtype=A.dtype)
    if maxK is None or maxK > N:
        maxK = N
    if maxK < r:
        maxK = r
    if minK is None or minK < r:
        minK = r
    if minK > N:
        minK = N
    if min_add_K is not None:
        minK = max(minK, r + min_add_K)
    if minK > maxK:
        minK = maxK
        #raise ValueError('minK value cannot be greater than maxK value')
    if top_k_index == -1 or top_k_index > N:
        top_k_index = N
    if top_k_index < r:
        top_k_index = r
    # choose initial submatrix and coefficients according to maxvol
    # algorithm
    index = np.zeros(N, dtype=np.int32)
    chosen = np.ones(top_k_index)
    tmp_index, C = py_maxvol(A, 1.05, start_maxvol_iters, top_k_index)
    index[:r] = tmp_index
    chosen[tmp_index] = 0
    C = np.asfortranarray(C)
    # compute square 2-norms of each row in coefficients matrix C
    row_norm_sqr = np.array(
        [chosen[i] * np.linalg.norm(C[i], 2)**2 for i in range(top_k_index)])
    # find maximum value in row_norm_sqr
    i = np.argmax(row_norm_sqr)
    K = r
    # set cgeru or zgeru for complex numbers and dger or sger
    # for float numbers
    try:
        ger = get_blas_funcs('geru', [C])
    except:
        ger = get_blas_funcs('ger', [C])
    # augment maxvol submatrix with each iteration
    while (row_norm_sqr[i] > tol2 and K < maxK) or K < minK:
        # add i to index and recompute C and square norms of each row
        # by SVM-formula
        index[K] = i
        chosen[i] = 0
        c = C[i].copy()
        v = C.dot(c.conj())
        l = 1.0 / (1 + v[i])
        ger(-l, v, c, a=C, overwrite_a=1)
        C = np.hstack([C, l * v.reshape(-1, 1)])
        row_norm_sqr -= (l * v[:top_k_index] * v[:top_k_index].conj()).real
        row_norm_sqr *= chosen
        # find maximum value in row_norm_sqr
        i = row_norm_sqr.argmax()
        K += 1
    # parameter identity_submatrix is True, set submatrix,
    # corresponding to maxvol rows, equal to identity matrix
    if identity_submatrix:
        C[index[:K]] = np.eye(K, dtype=C.dtype)
    return index[:K].copy(), C