def test_eigensystem(): spmat = scipy.sparse.coo_matrix( ([0.3, 0.6, 0.1, 0.3, 0.1, 0.6, 0.6, 0.1, 0.3, 0.1, 0.6, 0.3], ([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 ], [1, 2, 3, 0, 2, 3, 0, 1, 3, 0, 1, 2])), shape=(4, 4)) # Eigenvalues are as expected; note that we get the largest magnitudes, # but in algebraic order u1, s1 = eigensystem(spmat, 3, strip_a0=False) assert np.allclose(s1, [1, -0.4, -0.8]) # Eigenvectors are eigenvectors for i, si in enumerate(s1): ui = u1[:, i] assert np.allclose(spmat.dot(ui), si * ui) # Eigenvectors are orthonormal assert np.allclose(u1.T.dot(u1), np.identity(s1.shape[0])) # Stripping a0 removes the right eigenvalue u2, s2 = eigensystem(spmat, 2, strip_a0=True) assert np.allclose(s2, s1[1:]) compare_cols_within_sign(u2, u1[:, 1:]) # Asking for way too many eigenvalues is okay u3, s3 = eigensystem(spmat, 5) assert np.allclose(s3, s1) compare_cols_within_sign(u3, u1)
def test_eigensystem(): spmat = scipy.sparse.coo_matrix( ([0.3, 0.6, 0.1, 0.3, 0.1, 0.6, 0.6, 0.1, 0.3, 0.1, 0.6, 0.3], ([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], [1, 2, 3, 0, 2, 3, 0, 1, 3, 0, 1, 2])), shape=(4, 4)) # Eigenvalues are as expected; note that we get the largest magnitudes, # but in algebraic order u1, s1 = eigensystem(spmat, 3, strip_a0=False) assert np.allclose(s1, [1, -0.4, -0.8]) # Eigenvectors are eigenvectors for i, si in enumerate(s1): ui = u1[:, i] assert np.allclose(spmat.dot(ui), si * ui) # Eigenvectors are orthonormal assert np.allclose(u1.T.dot(u1), np.identity(s1.shape[0])) # Stripping a0 removes the right eigenvalue u2, s2 = eigensystem(spmat, 2, strip_a0=True) assert np.allclose(s2, s1[1:]) compare_cols_within_sign(u2, u1[:, 1:]) # Asking for way too many eigenvalues is okay u3, s3 = eigensystem(spmat, 5) assert np.allclose(s3, s1) compare_cols_within_sign(u3, u1)
def from_matrix(cls, matrix, labels, k, offset_weight=8e-6, strip_a0=True, normalize_gm=True): """ Build an AssocSpace from a SciPy sparse matrix and a LabelSet. Pass k to specify the number of dimensions; otherwise a value will be chosen for you based on the size of the matrix. strip_a0=True (on by default) removes the first eigenvector, which is often uninformative. normalize_gm=True (on by default) divides each entry by the geometric mean of the sum of the column and the sum of the row. This is one iteration of a process that might eventually yield a Markov matrix. However, in order to suppress sufficiently rare terms, we add an offset to the row and column sums computed from the number of dimensions and the overall sum of the matrix. """ # Immediately reject empty inputs if not labels: return None sums = matrix.sum(0) matrix_sum = np.sum(sums) logger.info("Building space with k=%d (sum=%.6f)." % (k, matrix_sum)) if normalize_gm: offset = matrix_sum * offset_weight normalizer = spdiags(1.0 / np.sqrt(sums + offset), 0, matrix.shape[0], matrix.shape[0]) matrix = normalizer * matrix * normalizer u, s = eigenmath.eigensystem(matrix, k=k, strip_a0=strip_a0) # This ensures that the normalization step is sane if s.shape[0] == 0 or s[0] <= 0: return None return cls(np.asarray(u, ">f4"), np.asarray(s, ">f4"), labels)