def test_svd_fixed_precison(self, A, L, eps, rand, lin_op): if _IS_32BIT and A.dtype == np.complex_ and rand: pytest.xfail("bug in external fortran code") A_or_L = A if not lin_op else L U, S, V = pymatrixid.svd(A_or_L, eps, rand=rand) B = U * S @ V.T.conj() assert_allclose(A, B, rtol=eps, atol=1e-08)
def new_space(el, ns, H, set_id, step, wrt_file): st = time.time() f = h5py.File("spatial.hdf5", 'a') raw = f.get('neig_%s' % set_id)[...] print "shape of raw data: %s" % str(raw.shape) # subtract out the mean feature values from corr_tmp raw_mean = np.mean(raw, 0)[None, :] raw_tmp = raw - raw_mean n_samp = raw_tmp.shape[0] # pca = PCA(n_components=20) # pca.fit(corr_tmp) # ratios = np.round(100*pca.explained_variance_ratio_, 1) # msg = "pca explained variance: %s%%" % str(ratios) # rr.WP(msg, wrt_file) # pca = TruncatedSVD(n_components=20) # pca.fit(corr_tmp) # ratios = np.round(100*pca.explained_variance_ratio_, 1) # msg = "pca explained variance: %s%%" % str(ratios) # rr.WP(msg, wrt_file) # print "corr_tmp.shape: %s" % str(corr_tmp.shape) # print "corr_mean.shape: %s" % str(corr_mean.shape) U, S, V = svd(raw_tmp, 100) print "V.shape: %s" % str(V.shape) f.create_dataset('raw_mean', data=raw_mean) f.create_dataset('U', data=U) f.create_dataset('S', data=S) f.create_dataset('V', data=V) print S # """check variance after whitening""" # V_norm = V/(S[None, :]*np.sqrt(n_samp)) # corr_tmp_wht = np.dot(corr_tmp, V_norm) # tmp_var = np.var(corr_tmp_wht, axis=0) # print "variance for each pc axis: %s" % str(tmp_var) """calculate percentage explained variance""" transform = np.dot(raw_tmp, V) exp_var = np.var(transform, axis=0) full_var = np.var(raw_tmp, axis=0).sum() ratios = np.round(100 * (exp_var / full_var), 1) print ratios.sum() msg = "pca explained variance: %s%%" % str(ratios) rr.WP(msg, wrt_file) f.create_dataset('ratios', data=ratios) f.close() msg = "PCA completed: %ss" % np.round(time.time() - st, 5) rr.WP(msg, wrt_file)
def _isvd(x, cutoff=0.0, cutoff_mode=2, max_bond=-1, absorb=0): """SVD-decomposition using interpolative matrix random methods. Allows the computation of only a certain number of singular values, e.g. max_bond, from the get-go, and is thus more efficient. Can also supply ``scipy.sparse.linalg.LinearOperator``. """ k = _choose_k(x, cutoff, max_bond) if k == 'full': if not isinstance(x, np.ndarray): x = x.to_dense() return _svd(x, cutoff, cutoff_mode, max_bond, absorb) U, s, V = sli.svd(x, k) V = dag(V) return _trim_and_renorm_SVD(U, s, V, cutoff, cutoff_mode, max_bond, absorb)
def estimate_rank(self, tol): """ Estimate the (low) rank of a noisy matrix via hard thresholding of singular values. See Gavish & Donoho, 2013. The optimal hard threshold for singular values is 4/sqrt(3) """ mat = self.to_hankelet() # nrows, ncols = mat.shape # beta = nrows / ncols # omega = 0.56 * beta ** 3 - 0.95 * beta ** 2 + 1.82 * beta + 1.43 _, s, _ = sli.svd(mat, min(10, min(mat.shape))) # return np.argmin(s > omega * np.median(s)) eigen = s**2 diff = np.abs(np.diff(eigen / eigen[0])) return np.argmin(diff > tol)
def check_id(self, dtype): # Test ID routines on a Hilbert matrix. # set parameters n = 300 eps = 1e-12 # construct Hilbert matrix A = hilbert(n).astype(dtype) if np.issubdtype(dtype, np.complexfloating): A = A * (1 + 1j) L = aslinearoperator(A) # find rank S = np.linalg.svd(A, compute_uv=False) try: rank = np.nonzero(S < eps)[0][0] except: rank = n # print input summary _debug_print("Hilbert matrix dimension: %8i" % n) _debug_print("Working precision: %8.2e" % eps) _debug_print("Rank to working precision: %8i" % rank) # set print format fmt = "%8.2e (s) / %5s" # test real ID routines _debug_print("-----------------------------------------") _debug_print("Real ID routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_id / idzp_id ...", ) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(A, eps, rand=False) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_aid / idzp_aid ...", ) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(A, eps) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_rid / idzp_rid ...", ) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(L, eps) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # fixed rank k = rank _debug_print("Calling iddr_id / idzr_id ...", ) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(A, k, rand=False) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_aid / idzr_aid ...", ) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(A, k) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_rid / idzr_rid ...", ) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(L, k) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # check skeleton and interpolation matrices idx, proj = pymatrixid.interp_decomp(A, k, rand=False) P = pymatrixid.reconstruct_interp_matrix(idx, proj) B = pymatrixid.reconstruct_skel_matrix(A, k, idx) assert_(np.allclose(B, A[:, idx[:k]], eps)) assert_(np.allclose(B.dot(P), A, eps)) # test SVD routines _debug_print("-----------------------------------------") _debug_print("SVD routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_svd / idzp_svd ...", ) t0 = time.clock() U, S, V = pymatrixid.svd(A, eps, rand=False) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_asvd / idzp_asvd...", ) t0 = time.clock() U, S, V = pymatrixid.svd(A, eps) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_rsvd / idzp_rsvd...", ) t0 = time.clock() U, S, V = pymatrixid.svd(L, eps) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # fixed rank k = rank _debug_print("Calling iddr_svd / idzr_svd ...", ) t0 = time.clock() U, S, V = pymatrixid.svd(A, k, rand=False) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_asvd / idzr_asvd ...", ) t0 = time.clock() U, S, V = pymatrixid.svd(A, k) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_rsvd / idzr_rsvd ...", ) t0 = time.clock() U, S, V = pymatrixid.svd(L, k) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # ID to SVD idx, proj = pymatrixid.interp_decomp(A, k, rand=False) Up, Sp, Vp = pymatrixid.id_to_svd(A[:, idx[:k]], idx, proj) B = U.dot(np.diag(S).dot(V.T.conj())) assert_(np.allclose(A, B, eps)) # Norm estimates s = svdvals(A) norm_2_est = pymatrixid.estimate_spectral_norm(A) assert_(np.allclose(norm_2_est, s[0], 1e-6)) B = A.copy() B[:, 0] *= 1.2 s = svdvals(A - B) norm_2_est = pymatrixid.estimate_spectral_norm_diff(A, B) assert_(np.allclose(norm_2_est, s[0], 1e-6)) # Rank estimates B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=dtype) for M in [A, B]: ML = aslinearoperator(M) rank_tol = 1e-9 rank_np = np.linalg.matrix_rank(M, norm(M, 2) * rank_tol) rank_est = pymatrixid.estimate_rank(M, rank_tol) rank_est_2 = pymatrixid.estimate_rank(ML, rank_tol) assert_(rank_est >= rank_np) assert_(rank_est <= rank_np + 10) assert_(rank_est_2 >= rank_np - 4) assert_(rank_est_2 <= rank_np + 4)
def test_rank_too_large(self): # svd(array, k) should not segfault a = np.ones((4, 3)) with assert_raises(ValueError): pymatrixid.svd(a, 4)
def my_svd(A, eps_or_k=0.01): if A.dtype != np.float64: A = A.astype(np.float64) U, S, V = svd(A, eps_or_k, rand=False) return U, S, V.T
def check_id(self, dtype): # Test ID routines on a Hilbert matrix. # set parameters n = 300 eps = 1e-12 # construct Hilbert matrix A = hilbert(n).astype(dtype) if np.issubdtype(dtype, np.complexfloating): A = A * (1 + 1j) L = aslinearoperator(A) # find rank S = np.linalg.svd(A, compute_uv=False) try: rank = np.nonzero(S < eps)[0][0] except: rank = n # print input summary _debug_print("Hilbert matrix dimension: %8i" % n) _debug_print("Working precision: %8.2e" % eps) _debug_print("Rank to working precision: %8i" % rank) # set print format fmt = "%8.2e (s) / %5s" # test real ID routines _debug_print("-----------------------------------------") _debug_print("Real ID routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_id / idzp_id ...",) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(A, eps, rand=False) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_aid / idzp_aid ...",) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(A, eps) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_rid / idzp_rid ...",) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(L, eps) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # fixed rank k = rank _debug_print("Calling iddr_id / idzr_id ...",) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(A, k, rand=False) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_aid / idzr_aid ...",) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(A, k) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_rid / idzr_rid ...",) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(L, k) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # check skeleton and interpolation matrices idx, proj = pymatrixid.interp_decomp(A, k, rand=False) P = pymatrixid.reconstruct_interp_matrix(idx, proj) B = pymatrixid.reconstruct_skel_matrix(A, k, idx) assert_(np.allclose(B, A[:,idx[:k]], eps)) assert_(np.allclose(B.dot(P), A, eps)) # test SVD routines _debug_print("-----------------------------------------") _debug_print("SVD routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_svd / idzp_svd ...",) t0 = time.clock() U, S, V = pymatrixid.svd(A, eps, rand=False) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_asvd / idzp_asvd...",) t0 = time.clock() U, S, V = pymatrixid.svd(A, eps) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_rsvd / idzp_rsvd...",) t0 = time.clock() U, S, V = pymatrixid.svd(L, eps) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # fixed rank k = rank _debug_print("Calling iddr_svd / idzr_svd ...",) t0 = time.clock() U, S, V = pymatrixid.svd(A, k, rand=False) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_asvd / idzr_asvd ...",) t0 = time.clock() U, S, V = pymatrixid.svd(A, k) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_rsvd / idzr_rsvd ...",) t0 = time.clock() U, S, V = pymatrixid.svd(L, k) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # ID to SVD idx, proj = pymatrixid.interp_decomp(A, k, rand=False) Up, Sp, Vp = pymatrixid.id_to_svd(A[:, idx[:k]], idx, proj) B = U.dot(np.diag(S).dot(V.T.conj())) assert_(np.allclose(A, B, eps)) # Norm estimates s = svdvals(A) norm_2_est = pymatrixid.estimate_spectral_norm(A) assert_(np.allclose(norm_2_est, s[0], 1e-6)) B = A.copy() B[:,0] *= 1.2 s = svdvals(A - B) norm_2_est = pymatrixid.estimate_spectral_norm_diff(A, B) assert_(np.allclose(norm_2_est, s[0], 1e-6)) # Rank estimates B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=dtype) for M in [A, B]: ML = aslinearoperator(M) rank_np = np.linalg.matrix_rank(M, 1e-9) rank_est = pymatrixid.estimate_rank(M, 1e-9) rank_est_2 = pymatrixid.estimate_rank(ML, 1e-9) assert_(rank_est >= rank_np) assert_(rank_est <= rank_np + 10) assert_(rank_est_2 >= rank_np) assert_(rank_est_2 <= rank_np + 10)
def reduce(el, ns_set, H, set_id_set, step, wrt_file): st = time.time() print "H: %s" % H n_corr = H**2 ns_tot = np.sum(ns_set) f_master = h5py.File("sve_reduced_all.hdf5", 'w') f_master.create_dataset("allcorr", (ns_tot, n_corr * el**3), dtype='complex128') # f_master.create_dataset("allcorr", # (ns_tot, n_corr*el**3), # dtype='float64') allcorr = f_master.get('allcorr') f_stats = h5py.File("spatial_stats.hdf5", 'a') c = 0 for ii in xrange(len(set_id_set)): tmp = f_stats.get('ff_%s' % set_id_set[ii])[...] ff = tmp.reshape(ns_set[ii], n_corr * el**3) allcorr[c:c + ns_set[ii], ...] = ff c += ns_set[ii] msg = "correlations combined" rr.WP(msg, wrt_file) corr_tmp = allcorr[...] # subtract out the mean feature values from corr_tmp corr_mean = np.mean(corr_tmp, 0)[None, :] corr_tmp = corr_tmp - corr_mean n_samp = corr_tmp.shape[0] # pca = PCA(n_components=20) # pca.fit(corr_tmp) # ratios = np.round(100*pca.explained_variance_ratio_, 1) # msg = "pca explained variance: %s%%" % str(ratios) # rr.WP(msg, wrt_file) # pca = TruncatedSVD(n_components=20) # pca.fit(corr_tmp) # ratios = np.round(100*pca.explained_variance_ratio_, 1) # msg = "pca explained variance: %s%%" % str(ratios) # rr.WP(msg, wrt_file) # print "corr_tmp.shape: %s" % str(corr_tmp.shape) # print "corr_mean.shape: %s" % str(corr_mean.shape) U, S, V = svd(corr_tmp, 20) print "V.shape: %s" % str(V.shape) # """check variance after whitening""" # V_norm = V/(S[None, :]*np.sqrt(n_samp)) # corr_tmp_wht = np.dot(corr_tmp, V_norm) # tmp_var = np.var(corr_tmp_wht, axis=0) # print "variance for each pc axis: %s" % str(tmp_var) """calculate percentage explained variance""" # X_transformed = np.dot(U, np.diag(S)) # exp_var = np.var(X_transformed, axis=0) exp_var = (S**2) / n_samp # full_var = np.var(corr_tmp, axis=0).sum() full_var = exp_var.sum() ratios = np.round(100 * (exp_var / full_var), 1) print ratios.sum() msg = "pca explained variance: %s%%" % str(ratios) rr.WP(msg, wrt_file) f_master.create_dataset('ratios', data=ratios) f_red = h5py.File("sve_reduced.hdf5", 'w') for ii in xrange(len(set_id_set)): ff = f_stats.get('ff_%s' % set_id_set[ii])[...] ff = ff.reshape(ns_set[ii], n_corr * el**3) # subtract out the mean feature values ff_r = ff ff_r = ff_r - corr_mean # tmp = pca.transform(ff_r) # calculate the pc scores for ff_r tmp = np.dot(ff_r, V) f_red.create_dataset('reduced_%s' % set_id_set[ii], data=tmp, dtype='complex128') # f_red.create_dataset('reduced_%s' % set_id_set[ii], # data=tmp, # dtype='float64') f_red.close() f_stats.close() f_master.close() msg = "PCA completed: %ss" % np.round(time.time() - st, 5) rr.WP(msg, wrt_file)