def cmp_mat(file1, file2): # Compare two matlab files # ignoring the newlines (lf vs cr-lf) contents1 = sio.loadmat(file1) contents2 = sio.loadmat(file2) # Warning: Cannot guarantee that the data will be the 0th element in this array!! keylist = list(contents1.keys()) for key in keylist: if key.startswith("test_"): data1 = contents1[key] data2 = contents2[key] #if scipy.rank(data1) != scipy.rank(data2): if sli.estimate_rank(data1, eps=1e-10) != sli.estimate_rank( data2, eps=1e-10): print("Data sets in the two files are not of the same rank") return False if data1.shape != data2.shape: print("Data sets in the two files do not have the same shape") return False #print "About to run compare_data()" return compare_data(data1, data2) print("No data to compare!") return True
def betti_number(self, i, eps=None): boundop_i = self.boundary_operator(i) boundop_ip1 = self.boundary_operator(i + 1) if i == 0: boundop_i_rank = 0 else: if eps is None: try: boundop_i_rank = np.linalg.matrix_rank(boundop_i.toarray()) except np.linalg.LinAlgError: boundop_i_rank = boundop_i.shape[1] else: boundop_i_rank = estimate_rank(aslinearoperator(boundop_i), eps) if eps is None: try: boundop_ip1_rank = np.linalg.matrix_rank(boundop_ip1.toarray()) except np.linalg.LinAlgError: boundop_ip1_rank = boundop_ip1.shape[1] else: boundop_ip1_rank = estimate_rank(aslinearoperator(boundop_ip1), eps) return ((boundop_i.shape[1] - boundop_i_rank) - boundop_ip1_rank)
def test_rank_estimates_array(self, A): B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=A.dtype) for M in [A, B]: rank_tol = 1e-9 rank_np = np.linalg.matrix_rank(M, norm(M, 2) * rank_tol) rank_est = pymatrixid.estimate_rank(M, rank_tol) assert_(rank_est >= rank_np) assert_(rank_est <= rank_np + 10)
def test_rank_estimates_lin_op(self, A): B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=A.dtype) for M in [A, B]: ML = aslinearoperator(M) rank_tol = 1e-9 rank_np = np.linalg.matrix_rank(M, norm(M, 2) * rank_tol) rank_est = pymatrixid.estimate_rank(ML, rank_tol) assert_(rank_est >= rank_np - 4) assert_(rank_est <= rank_np + 4)
def check_id(self, dtype): # Test ID routines on a Hilbert matrix. # set parameters n = 300 eps = 1e-12 # construct Hilbert matrix A = hilbert(n).astype(dtype) if np.issubdtype(dtype, np.complexfloating): A = A * (1 + 1j) L = aslinearoperator(A) # find rank S = np.linalg.svd(A, compute_uv=False) try: rank = np.nonzero(S < eps)[0][0] except: rank = n # print input summary _debug_print("Hilbert matrix dimension: %8i" % n) _debug_print("Working precision: %8.2e" % eps) _debug_print("Rank to working precision: %8i" % rank) # set print format fmt = "%8.2e (s) / %5s" # test real ID routines _debug_print("-----------------------------------------") _debug_print("Real ID routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_id / idzp_id ...", ) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(A, eps, rand=False) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_aid / idzp_aid ...", ) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(A, eps) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_rid / idzp_rid ...", ) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(L, eps) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # fixed rank k = rank _debug_print("Calling iddr_id / idzr_id ...", ) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(A, k, rand=False) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_aid / idzr_aid ...", ) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(A, k) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_rid / idzr_rid ...", ) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(L, k) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # check skeleton and interpolation matrices idx, proj = pymatrixid.interp_decomp(A, k, rand=False) P = pymatrixid.reconstruct_interp_matrix(idx, proj) B = pymatrixid.reconstruct_skel_matrix(A, k, idx) assert_(np.allclose(B, A[:, idx[:k]], eps)) assert_(np.allclose(B.dot(P), A, eps)) # test SVD routines _debug_print("-----------------------------------------") _debug_print("SVD routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_svd / idzp_svd ...", ) t0 = time.clock() U, S, V = pymatrixid.svd(A, eps, rand=False) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_asvd / idzp_asvd...", ) t0 = time.clock() U, S, V = pymatrixid.svd(A, eps) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_rsvd / idzp_rsvd...", ) t0 = time.clock() U, S, V = pymatrixid.svd(L, eps) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # fixed rank k = rank _debug_print("Calling iddr_svd / idzr_svd ...", ) t0 = time.clock() U, S, V = pymatrixid.svd(A, k, rand=False) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_asvd / idzr_asvd ...", ) t0 = time.clock() U, S, V = pymatrixid.svd(A, k) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_rsvd / idzr_rsvd ...", ) t0 = time.clock() U, S, V = pymatrixid.svd(L, k) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # ID to SVD idx, proj = pymatrixid.interp_decomp(A, k, rand=False) Up, Sp, Vp = pymatrixid.id_to_svd(A[:, idx[:k]], idx, proj) B = U.dot(np.diag(S).dot(V.T.conj())) assert_(np.allclose(A, B, eps)) # Norm estimates s = svdvals(A) norm_2_est = pymatrixid.estimate_spectral_norm(A) assert_(np.allclose(norm_2_est, s[0], 1e-6)) B = A.copy() B[:, 0] *= 1.2 s = svdvals(A - B) norm_2_est = pymatrixid.estimate_spectral_norm_diff(A, B) assert_(np.allclose(norm_2_est, s[0], 1e-6)) # Rank estimates B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=dtype) for M in [A, B]: ML = aslinearoperator(M) rank_tol = 1e-9 rank_np = np.linalg.matrix_rank(M, norm(M, 2) * rank_tol) rank_est = pymatrixid.estimate_rank(M, rank_tol) rank_est_2 = pymatrixid.estimate_rank(ML, rank_tol) assert_(rank_est >= rank_np) assert_(rank_est <= rank_np + 10) assert_(rank_est_2 >= rank_np - 4) assert_(rank_est_2 <= rank_np + 4)
for i in range(0, len(sparse_list)): componentMatrix = sparse_list[i] csrForm = componentMatrix.tocsr() normMatrix = onenormest(csrForm, t=3, itmax=5, compute_v=False, compute_w=False) #diffFlag = numpy.allclose(csrForm.data, csrForm.transpose().data) #symmFlag = 'symmetric' if diffFlag else 'not symmetric' eps = pow(10, -9) rank = estimate_rank(aslinearoperator(componentMatrix), eps) #print 'Approximate rank with relative error of(', eps, ')for numerical rank definition = ',rank print obj.data_file_list[i], obj.data_file_name[ i], componentMatrix.shape, ' NonZeros = ', componentMatrix.nnz, ' 1-Norm = ', normMatrix, ' rank ', rank obj.logger_i.info(obj.data_file_list[i] + ' ' + obj.data_file_name[i] + str(componentMatrix.shape) + ' NonZeros = ' + str(componentMatrix.nnz) + ' 1-Norm = ' + str(normMatrix) + ' rank ' + str(rank)) #saving the sparsity pattern fig = plt.figure(figsize=(24.0, 15.0)) fig.clf() fig.gca().add_artist(plt.spy(componentMatrix)) brake.save(obj.output_path + 'dataAnalysis/' + obj.data_file_name[i], ext="png",
def check_id(self, dtype): # Test ID routines on a Hilbert matrix. # set parameters n = 300 eps = 1e-12 # construct Hilbert matrix A = hilbert(n).astype(dtype) if np.issubdtype(dtype, np.complexfloating): A = A * (1 + 1j) L = aslinearoperator(A) # find rank S = np.linalg.svd(A, compute_uv=False) try: rank = np.nonzero(S < eps)[0][0] except: rank = n # print input summary _debug_print("Hilbert matrix dimension: %8i" % n) _debug_print("Working precision: %8.2e" % eps) _debug_print("Rank to working precision: %8i" % rank) # set print format fmt = "%8.2e (s) / %5s" # test real ID routines _debug_print("-----------------------------------------") _debug_print("Real ID routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_id / idzp_id ...",) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(A, eps, rand=False) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_aid / idzp_aid ...",) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(A, eps) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_rid / idzp_rid ...",) t0 = time.clock() k, idx, proj = pymatrixid.interp_decomp(L, eps) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # fixed rank k = rank _debug_print("Calling iddr_id / idzr_id ...",) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(A, k, rand=False) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_aid / idzr_aid ...",) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(A, k) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_rid / idzr_rid ...",) t0 = time.clock() idx, proj = pymatrixid.interp_decomp(L, k) t = time.clock() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # check skeleton and interpolation matrices idx, proj = pymatrixid.interp_decomp(A, k, rand=False) P = pymatrixid.reconstruct_interp_matrix(idx, proj) B = pymatrixid.reconstruct_skel_matrix(A, k, idx) assert_(np.allclose(B, A[:,idx[:k]], eps)) assert_(np.allclose(B.dot(P), A, eps)) # test SVD routines _debug_print("-----------------------------------------") _debug_print("SVD routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_svd / idzp_svd ...",) t0 = time.clock() U, S, V = pymatrixid.svd(A, eps, rand=False) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_asvd / idzp_asvd...",) t0 = time.clock() U, S, V = pymatrixid.svd(A, eps) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddp_rsvd / idzp_rsvd...",) t0 = time.clock() U, S, V = pymatrixid.svd(L, eps) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # fixed rank k = rank _debug_print("Calling iddr_svd / idzr_svd ...",) t0 = time.clock() U, S, V = pymatrixid.svd(A, k, rand=False) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_asvd / idzr_asvd ...",) t0 = time.clock() U, S, V = pymatrixid.svd(A, k) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) _debug_print("Calling iddr_rsvd / idzr_rsvd ...",) t0 = time.clock() U, S, V = pymatrixid.svd(L, k) t = time.clock() - t0 B = np.dot(U, np.dot(np.diag(S), V.T.conj())) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_(np.allclose(A, B, eps)) # ID to SVD idx, proj = pymatrixid.interp_decomp(A, k, rand=False) Up, Sp, Vp = pymatrixid.id_to_svd(A[:, idx[:k]], idx, proj) B = U.dot(np.diag(S).dot(V.T.conj())) assert_(np.allclose(A, B, eps)) # Norm estimates s = svdvals(A) norm_2_est = pymatrixid.estimate_spectral_norm(A) assert_(np.allclose(norm_2_est, s[0], 1e-6)) B = A.copy() B[:,0] *= 1.2 s = svdvals(A - B) norm_2_est = pymatrixid.estimate_spectral_norm_diff(A, B) assert_(np.allclose(norm_2_est, s[0], 1e-6)) # Rank estimates B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=dtype) for M in [A, B]: ML = aslinearoperator(M) rank_np = np.linalg.matrix_rank(M, 1e-9) rank_est = pymatrixid.estimate_rank(M, 1e-9) rank_est_2 = pymatrixid.estimate_rank(ML, 1e-9) assert_(rank_est >= rank_np) assert_(rank_est <= rank_np + 10) assert_(rank_est_2 >= rank_np) assert_(rank_est_2 <= rank_np + 10)
def dense_matrix_rank_estimate(A, tol): return estimate_rank(A, tol)
def matrix_rank(collection: Collection, taglist: List[str]=['all'], region: str='xanes', range: list=[-inf,inf], eps: float=1e-4, kweight: int=2) -> Dataset: """Computes the matrix rank of a collection. Parameters ---------- collection Collection with the groups to compute the rank. taglist List with keys to filter groups based on their ``tags`` attributes in the Collection. The default is ['all']. region XAFS region to compute the rank. Accepted values are 'dxanes', 'xanes', or 'exafs'. The default is 'xanes'. range Domain range in absolute values. Energy units are expected for 'dxanes' or 'xanes', while wavenumber (k) units are expected for 'exafs'. The default is [-:data:`~numpy.inf`, :data:`~numpy.inf`]. eps Relative error for computation of rank. The default is 1e-4. kweight Exponent for weighting chi(k) by k^kweight. Only valid for ``region='exafs'``. The default is 2. Returns ------- : Dataset with the following arguments: - ``rank`` : matrix rank. - ``groupnames`` : list with names of groups. - ``energy`` : array with energy values. Returned only if ``region='xanes`` or ``region=dxanes``. - ``k`` : array with wavenumber values. Returned only if ``region='exafs'``. - ``matrix`` : array with observed values for groups in ``range``. - ``rank_pars`` : dictionary with parameters of calculation. Notes ----- Computation of matrix rank is based on the interpolative decompsition method. See :func:`~scipy.linalg.interpolative.estimate_rank` for further details. Examples -------- >>> from araucaria.testdata import get_testpath >>> from araucaria import Dataset >>> from araucaria.xas import pre_edge, autobk >>> from araucaria.linalg import matrix_rank >>> from araucaria.io import read_collection_hdf5 >>> from araucaria.utils import check_objattrs >>> fpath = get_testpath('Fe_database.h5') >>> collection = read_collection_hdf5(fpath) >>> collection.apply(pre_edge) >>> out = matrix_rank(collection, region='xanes') >>> attrs = ['groupnames', 'energy', 'matrix', 'rank', 'rank_pars'] >>> check_objattrs(out, Dataset, attrs) [True, True, True, True, True] >>> print(out.rank) 4 >>> # rank of exafs matrix >>> collection.apply(autobk) >>> out = matrix_rank(collection, region='exafs', range=[0,10]) >>> attrs = ['groupnames', 'k', 'matrix', 'rank', 'rank_pars'] >>> check_objattrs(out, Dataset, attrs) [True, True, True, True, True] >>> print(out.rank) 4 """ xvals, matrix = get_mapped_data(collection, taglist=taglist, region=region, range=range, kweight=kweight) # condtion number rnk = estimate_rank(matrix, eps=eps) # storing parameters rank_pars = {'region' : region, 'range' : range, 'eps' : eps,} # additional parameters if region == 'exafs': xvar = 'k' # x-variable rank_pars['kweight'] = kweight else: # xanes/dxanes clustering xvar = 'energy' # x-variable # storing cluster results content = {'groupnames' : collection.get_names(taglist=taglist), xvar : xvals, 'matrix' : matrix, 'rank' : rnk, 'rank_pars' : rank_pars,} out = Dataset(**content) return out