def test_svd_fixed_precison(self, A, L, eps, rand, lin_op):
        if _IS_32BIT and A.dtype == np.complex_ and rand:
            pytest.xfail("bug in external fortran code")
        A_or_L = A if not lin_op else L

        U, S, V = pymatrixid.svd(A_or_L, eps, rand=rand)
        B = U * S @ V.T.conj()
        assert_allclose(A, B, rtol=eps, atol=1e-08)
Exemple #2
0
def new_space(el, ns, H, set_id, step, wrt_file):

    st = time.time()

    f = h5py.File("spatial.hdf5", 'a')

    raw = f.get('neig_%s' % set_id)[...]
    print "shape of raw data: %s" % str(raw.shape)

    # subtract out the mean feature values from corr_tmp
    raw_mean = np.mean(raw, 0)[None, :]
    raw_tmp = raw - raw_mean

    n_samp = raw_tmp.shape[0]

    # pca = PCA(n_components=20)
    # pca.fit(corr_tmp)
    # ratios = np.round(100*pca.explained_variance_ratio_, 1)
    # msg = "pca explained variance: %s%%" % str(ratios)
    # rr.WP(msg, wrt_file)

    # pca = TruncatedSVD(n_components=20)
    # pca.fit(corr_tmp)
    # ratios = np.round(100*pca.explained_variance_ratio_, 1)
    # msg = "pca explained variance: %s%%" % str(ratios)
    # rr.WP(msg, wrt_file)

    # print "corr_tmp.shape: %s" % str(corr_tmp.shape)
    # print "corr_mean.shape: %s" % str(corr_mean.shape)

    U, S, V = svd(raw_tmp, 100)
    print "V.shape: %s" % str(V.shape)
    f.create_dataset('raw_mean', data=raw_mean)
    f.create_dataset('U', data=U)
    f.create_dataset('S', data=S)
    f.create_dataset('V', data=V)

    print S

    # """check variance after whitening"""
    # V_norm = V/(S[None, :]*np.sqrt(n_samp))
    # corr_tmp_wht = np.dot(corr_tmp, V_norm)
    # tmp_var = np.var(corr_tmp_wht, axis=0)
    # print "variance for each pc axis: %s" % str(tmp_var)
    """calculate percentage explained variance"""
    transform = np.dot(raw_tmp, V)
    exp_var = np.var(transform, axis=0)
    full_var = np.var(raw_tmp, axis=0).sum()
    ratios = np.round(100 * (exp_var / full_var), 1)
    print ratios.sum()
    msg = "pca explained variance: %s%%" % str(ratios)
    rr.WP(msg, wrt_file)
    f.create_dataset('ratios', data=ratios)

    f.close()

    msg = "PCA completed: %ss" % np.round(time.time() - st, 5)
    rr.WP(msg, wrt_file)
Exemple #3
0
def _isvd(x, cutoff=0.0, cutoff_mode=2, max_bond=-1, absorb=0):
    """SVD-decomposition using interpolative matrix random methods. Allows the
    computation of only a certain number of singular values, e.g. max_bond,
    from the get-go, and is thus more efficient. Can also supply
    ``scipy.sparse.linalg.LinearOperator``.
    """
    k = _choose_k(x, cutoff, max_bond)

    if k == 'full':
        if not isinstance(x, np.ndarray):
            x = x.to_dense()
        return _svd(x, cutoff, cutoff_mode, max_bond, absorb)

    U, s, V = sli.svd(x, k)
    V = dag(V)
    return _trim_and_renorm_SVD(U, s, V, cutoff, cutoff_mode, max_bond, absorb)
Exemple #4
0
    def estimate_rank(self, tol):
        """
        Estimate the (low) rank of a noisy matrix via
        hard thresholding of singular values.

        See Gavish & Donoho, 2013.
            The optimal hard threshold for singular values is 4/sqrt(3)
        """
        mat = self.to_hankelet()
        # nrows, ncols = mat.shape
        # beta = nrows / ncols
        # omega = 0.56 * beta ** 3 - 0.95 * beta ** 2 + 1.82 * beta + 1.43
        _, s, _ = sli.svd(mat, min(10, min(mat.shape)))
        # return np.argmin(s > omega * np.median(s))
        eigen = s**2
        diff = np.abs(np.diff(eigen / eigen[0]))
        return np.argmin(diff > tol)
Exemple #5
0
    def check_id(self, dtype):
        # Test ID routines on a Hilbert matrix.

        # set parameters
        n = 300
        eps = 1e-12

        # construct Hilbert matrix
        A = hilbert(n).astype(dtype)
        if np.issubdtype(dtype, np.complexfloating):
            A = A * (1 + 1j)
        L = aslinearoperator(A)

        # find rank
        S = np.linalg.svd(A, compute_uv=False)
        try:
            rank = np.nonzero(S < eps)[0][0]
        except:
            rank = n

        # print input summary
        _debug_print("Hilbert matrix dimension:        %8i" % n)
        _debug_print("Working precision:               %8.2e" % eps)
        _debug_print("Rank to working precision:       %8i" % rank)

        # set print format
        fmt = "%8.2e (s) / %5s"

        # test real ID routines
        _debug_print("-----------------------------------------")
        _debug_print("Real ID routines")
        _debug_print("-----------------------------------------")

        # fixed precision
        _debug_print("Calling iddp_id / idzp_id  ...", )
        t0 = time.clock()
        k, idx, proj = pymatrixid.interp_decomp(A, eps, rand=False)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddp_aid / idzp_aid ...", )
        t0 = time.clock()
        k, idx, proj = pymatrixid.interp_decomp(A, eps)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddp_rid / idzp_rid ...", )
        t0 = time.clock()
        k, idx, proj = pymatrixid.interp_decomp(L, eps)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        # fixed rank
        k = rank

        _debug_print("Calling iddr_id / idzr_id  ...", )
        t0 = time.clock()
        idx, proj = pymatrixid.interp_decomp(A, k, rand=False)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddr_aid / idzr_aid ...", )
        t0 = time.clock()
        idx, proj = pymatrixid.interp_decomp(A, k)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddr_rid / idzr_rid ...", )
        t0 = time.clock()
        idx, proj = pymatrixid.interp_decomp(L, k)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        # check skeleton and interpolation matrices
        idx, proj = pymatrixid.interp_decomp(A, k, rand=False)
        P = pymatrixid.reconstruct_interp_matrix(idx, proj)
        B = pymatrixid.reconstruct_skel_matrix(A, k, idx)
        assert_(np.allclose(B, A[:, idx[:k]], eps))
        assert_(np.allclose(B.dot(P), A, eps))

        # test SVD routines
        _debug_print("-----------------------------------------")
        _debug_print("SVD routines")
        _debug_print("-----------------------------------------")

        # fixed precision
        _debug_print("Calling iddp_svd / idzp_svd ...", )
        t0 = time.clock()
        U, S, V = pymatrixid.svd(A, eps, rand=False)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddp_asvd / idzp_asvd...", )
        t0 = time.clock()
        U, S, V = pymatrixid.svd(A, eps)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddp_rsvd / idzp_rsvd...", )
        t0 = time.clock()
        U, S, V = pymatrixid.svd(L, eps)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        # fixed rank
        k = rank

        _debug_print("Calling iddr_svd / idzr_svd ...", )
        t0 = time.clock()
        U, S, V = pymatrixid.svd(A, k, rand=False)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddr_asvd / idzr_asvd ...", )
        t0 = time.clock()
        U, S, V = pymatrixid.svd(A, k)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddr_rsvd / idzr_rsvd ...", )
        t0 = time.clock()
        U, S, V = pymatrixid.svd(L, k)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        # ID to SVD
        idx, proj = pymatrixid.interp_decomp(A, k, rand=False)
        Up, Sp, Vp = pymatrixid.id_to_svd(A[:, idx[:k]], idx, proj)
        B = U.dot(np.diag(S).dot(V.T.conj()))
        assert_(np.allclose(A, B, eps))

        # Norm estimates
        s = svdvals(A)
        norm_2_est = pymatrixid.estimate_spectral_norm(A)
        assert_(np.allclose(norm_2_est, s[0], 1e-6))

        B = A.copy()
        B[:, 0] *= 1.2
        s = svdvals(A - B)
        norm_2_est = pymatrixid.estimate_spectral_norm_diff(A, B)
        assert_(np.allclose(norm_2_est, s[0], 1e-6))

        # Rank estimates
        B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=dtype)
        for M in [A, B]:
            ML = aslinearoperator(M)

            rank_tol = 1e-9
            rank_np = np.linalg.matrix_rank(M, norm(M, 2) * rank_tol)
            rank_est = pymatrixid.estimate_rank(M, rank_tol)
            rank_est_2 = pymatrixid.estimate_rank(ML, rank_tol)

            assert_(rank_est >= rank_np)
            assert_(rank_est <= rank_np + 10)

            assert_(rank_est_2 >= rank_np - 4)
            assert_(rank_est_2 <= rank_np + 4)
Exemple #6
0
 def test_rank_too_large(self):
     # svd(array, k) should not segfault
     a = np.ones((4, 3))
     with assert_raises(ValueError):
         pymatrixid.svd(a, 4)
def my_svd(A, eps_or_k=0.01):
    if A.dtype != np.float64:
        A = A.astype(np.float64)
    U, S, V = svd(A, eps_or_k, rand=False)
    return U, S, V.T
    def check_id(self, dtype):
        # Test ID routines on a Hilbert matrix.

        # set parameters
        n = 300
        eps = 1e-12

        # construct Hilbert matrix
        A = hilbert(n).astype(dtype)
        if np.issubdtype(dtype, np.complexfloating):
            A = A * (1 + 1j)
        L = aslinearoperator(A)

        # find rank
        S = np.linalg.svd(A, compute_uv=False)
        try:
            rank = np.nonzero(S < eps)[0][0]
        except:
            rank = n

        # print input summary
        _debug_print("Hilbert matrix dimension:        %8i" % n)
        _debug_print("Working precision:               %8.2e" % eps)
        _debug_print("Rank to working precision:       %8i" % rank)

        # set print format
        fmt = "%8.2e (s) / %5s"

        # test real ID routines
        _debug_print("-----------------------------------------")
        _debug_print("Real ID routines")
        _debug_print("-----------------------------------------")

        # fixed precision
        _debug_print("Calling iddp_id / idzp_id  ...",)
        t0 = time.clock()
        k, idx, proj = pymatrixid.interp_decomp(A, eps, rand=False)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddp_aid / idzp_aid ...",)
        t0 = time.clock()
        k, idx, proj = pymatrixid.interp_decomp(A, eps)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddp_rid / idzp_rid ...",)
        t0 = time.clock()
        k, idx, proj = pymatrixid.interp_decomp(L, eps)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        # fixed rank
        k = rank

        _debug_print("Calling iddr_id / idzr_id  ...",)
        t0 = time.clock()
        idx, proj = pymatrixid.interp_decomp(A, k, rand=False)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddr_aid / idzr_aid ...",)
        t0 = time.clock()
        idx, proj = pymatrixid.interp_decomp(A, k)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddr_rid / idzr_rid ...",)
        t0 = time.clock()
        idx, proj = pymatrixid.interp_decomp(L, k)
        t = time.clock() - t0
        B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj)
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        # check skeleton and interpolation matrices
        idx, proj = pymatrixid.interp_decomp(A, k, rand=False)
        P = pymatrixid.reconstruct_interp_matrix(idx, proj)
        B = pymatrixid.reconstruct_skel_matrix(A, k, idx)
        assert_(np.allclose(B, A[:,idx[:k]], eps))
        assert_(np.allclose(B.dot(P), A, eps))

        # test SVD routines
        _debug_print("-----------------------------------------")
        _debug_print("SVD routines")
        _debug_print("-----------------------------------------")

        # fixed precision
        _debug_print("Calling iddp_svd / idzp_svd ...",)
        t0 = time.clock()
        U, S, V = pymatrixid.svd(A, eps, rand=False)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddp_asvd / idzp_asvd...",)
        t0 = time.clock()
        U, S, V = pymatrixid.svd(A, eps)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddp_rsvd / idzp_rsvd...",)
        t0 = time.clock()
        U, S, V = pymatrixid.svd(L, eps)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        # fixed rank
        k = rank

        _debug_print("Calling iddr_svd / idzr_svd ...",)
        t0 = time.clock()
        U, S, V = pymatrixid.svd(A, k, rand=False)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddr_asvd / idzr_asvd ...",)
        t0 = time.clock()
        U, S, V = pymatrixid.svd(A, k)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        _debug_print("Calling iddr_rsvd / idzr_rsvd ...",)
        t0 = time.clock()
        U, S, V = pymatrixid.svd(L, k)
        t = time.clock() - t0
        B = np.dot(U, np.dot(np.diag(S), V.T.conj()))
        _debug_print(fmt % (t, np.allclose(A, B, eps)))
        assert_(np.allclose(A, B, eps))

        # ID to SVD
        idx, proj = pymatrixid.interp_decomp(A, k, rand=False)
        Up, Sp, Vp = pymatrixid.id_to_svd(A[:, idx[:k]], idx, proj)
        B = U.dot(np.diag(S).dot(V.T.conj()))
        assert_(np.allclose(A, B, eps))

        # Norm estimates
        s = svdvals(A)
        norm_2_est = pymatrixid.estimate_spectral_norm(A)
        assert_(np.allclose(norm_2_est, s[0], 1e-6))

        B = A.copy()
        B[:,0] *= 1.2
        s = svdvals(A - B)
        norm_2_est = pymatrixid.estimate_spectral_norm_diff(A, B)
        assert_(np.allclose(norm_2_est, s[0], 1e-6))

        # Rank estimates
        B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=dtype)
        for M in [A, B]:
            ML = aslinearoperator(M)

            rank_np = np.linalg.matrix_rank(M, 1e-9)
            rank_est = pymatrixid.estimate_rank(M, 1e-9)
            rank_est_2 = pymatrixid.estimate_rank(ML, 1e-9)

            assert_(rank_est >= rank_np)
            assert_(rank_est <= rank_np + 10)

            assert_(rank_est_2 >= rank_np)
            assert_(rank_est_2 <= rank_np + 10)
Exemple #9
0
def my_svd(A, eps_or_k=0.01):
    if A.dtype != np.float64:
        A = A.astype(np.float64)
    U, S, V = svd(A, eps_or_k, rand=False)

    return U, S, V.T
 def test_rank_too_large(self):
     # svd(array, k) should not segfault
     a = np.ones((4, 3))
     with assert_raises(ValueError):
         pymatrixid.svd(a, 4)
def reduce(el, ns_set, H, set_id_set, step, wrt_file):

    st = time.time()

    print "H: %s" % H
    n_corr = H**2

    ns_tot = np.sum(ns_set)
    f_master = h5py.File("sve_reduced_all.hdf5", 'w')

    f_master.create_dataset("allcorr", (ns_tot, n_corr * el**3),
                            dtype='complex128')

    # f_master.create_dataset("allcorr",
    #                         (ns_tot, n_corr*el**3),
    #                         dtype='float64')

    allcorr = f_master.get('allcorr')

    f_stats = h5py.File("spatial_stats.hdf5", 'a')

    c = 0
    for ii in xrange(len(set_id_set)):

        tmp = f_stats.get('ff_%s' % set_id_set[ii])[...]
        ff = tmp.reshape(ns_set[ii], n_corr * el**3)

        allcorr[c:c + ns_set[ii], ...] = ff

        c += ns_set[ii]

    msg = "correlations combined"
    rr.WP(msg, wrt_file)

    corr_tmp = allcorr[...]
    # subtract out the mean feature values from corr_tmp
    corr_mean = np.mean(corr_tmp, 0)[None, :]
    corr_tmp = corr_tmp - corr_mean

    n_samp = corr_tmp.shape[0]

    # pca = PCA(n_components=20)
    # pca.fit(corr_tmp)
    # ratios = np.round(100*pca.explained_variance_ratio_, 1)
    # msg = "pca explained variance: %s%%" % str(ratios)
    # rr.WP(msg, wrt_file)

    # pca = TruncatedSVD(n_components=20)
    # pca.fit(corr_tmp)
    # ratios = np.round(100*pca.explained_variance_ratio_, 1)
    # msg = "pca explained variance: %s%%" % str(ratios)
    # rr.WP(msg, wrt_file)

    # print "corr_tmp.shape: %s" % str(corr_tmp.shape)
    # print "corr_mean.shape: %s" % str(corr_mean.shape)

    U, S, V = svd(corr_tmp, 20)
    print "V.shape: %s" % str(V.shape)

    # """check variance after whitening"""
    # V_norm = V/(S[None, :]*np.sqrt(n_samp))
    # corr_tmp_wht = np.dot(corr_tmp, V_norm)
    # tmp_var = np.var(corr_tmp_wht, axis=0)
    # print "variance for each pc axis: %s" % str(tmp_var)
    """calculate percentage explained variance"""
    # X_transformed = np.dot(U, np.diag(S))
    # exp_var = np.var(X_transformed, axis=0)
    exp_var = (S**2) / n_samp
    # full_var = np.var(corr_tmp, axis=0).sum()
    full_var = exp_var.sum()
    ratios = np.round(100 * (exp_var / full_var), 1)
    print ratios.sum()
    msg = "pca explained variance: %s%%" % str(ratios)
    rr.WP(msg, wrt_file)
    f_master.create_dataset('ratios', data=ratios)

    f_red = h5py.File("sve_reduced.hdf5", 'w')

    for ii in xrange(len(set_id_set)):

        ff = f_stats.get('ff_%s' % set_id_set[ii])[...]
        ff = ff.reshape(ns_set[ii], n_corr * el**3)

        # subtract out the mean feature values
        ff_r = ff
        ff_r = ff_r - corr_mean

        # tmp = pca.transform(ff_r)

        # calculate the pc scores for ff_r
        tmp = np.dot(ff_r, V)

        f_red.create_dataset('reduced_%s' % set_id_set[ii],
                             data=tmp,
                             dtype='complex128')

        # f_red.create_dataset('reduced_%s' % set_id_set[ii],
        #                      data=tmp,
        #                      dtype='float64')

    f_red.close()
    f_stats.close()
    f_master.close()

    msg = "PCA completed: %ss" % np.round(time.time() - st, 5)
    rr.WP(msg, wrt_file)