def test_P_i(self, P_i: np.ndarray, method: str): if method == "krylov": pytest.importorskip("mpi4py") pytest.importorskip("petsc4py") pytest.importorskip("slepc4py") g = GPCCA(P_i, eta=None, method=method) for m in range(2, 8): try: g.optimize(m) except ValueError: continue X, RR = g.schur_vectors, g.schur_matrix assert_allclose(g.memberships.sum(1), 1.0) assert_allclose(g.coarse_grained_transition_matrix.sum(1), 1.0) assert_allclose(g.coarse_grained_input_distribution.sum(), 1.0) if g.coarse_grained_stationary_probability is not None: assert_allclose(g.coarse_grained_stationary_probability.sum(), 1.0) np.testing.assert_allclose(X[:, 0], 1.0) assert np.max(subspace_angles(P_i @ X, X @ RR)) < eps
def test_split_warning_LM(self, P_2: np.ndarray): g = GPCCA(P_2, eta=None, z="LM") with pytest.warns( UserWarning, match= "Clustering into 4 clusters will split complex conjugate eigenvalues. " "Skipping clustering into 4 clusters.", ): g.optimize({"m_min": 2, "m_max": 5}) with pytest.warns( UserWarning, match= "Clustering into 6 clusters will split complex conjugate eigenvalues. " "Skipping clustering into 6 clusters.", ): g.optimize({"m_min": 5, "m_max": 7}) with pytest.warns( UserWarning, match= "Clustering into 9 clusters will split complex conjugate eigenvalues. " "Skipping clustering into 9 clusters.", ): g.optimize({"m_min": 8, "m_max": 11}) with pytest.warns( UserWarning, match= "Clustering 12 data points into 12 clusters is always perfectly crisp. " "Thus m=12 won't be included in the search for the optimal cluster number.", ): g.optimize({"m_min": 11, "m_max": 12})
def test_normal_case_sparse( self, P: np.ndarray, sd: np.ndarray, count_sd: np.ndarray, count_Pc: np.ndarray, count_chi: np.ndarray, count_chi_sparse: np.ndarray, ): assert_allclose(sd, count_sd) g = GPCCA(csr_matrix(P), eta=sd, method="krylov") g.optimize((2, 10)) Pc = g.coarse_grained_transition_matrix assert_allclose(Pc, count_Pc, atol=eps) assert_allclose(Pc.sum(1), 1.0) assert_allclose(g.coarse_grained_transition_matrix.sum(1), 1.0) assert_allclose(g.memberships.sum(1), 1.0) # regenerated ground truth memberships chi = g.memberships chi = chi[:, _find_permutation(count_chi_sparse, chi)] assert_allclose(chi, count_chi_sparse, atol=eps) # ground truth memberships from matlab chi = chi[:, _find_permutation(count_chi, chi)] assert np.max(np.abs(chi - count_chi)) < 1e-4
def test_P_2_LR( self, P_2: np.ndarray, minChi_P_2_LR: np.ndarray, crispness_values_P_2_LR: np.ndarray, optimal_crispness_P_2_LR: np.float64, n_m_P_2_LR: np.int64, top_eigenvalues_P_2_LR: np.ndarray, method: str, ): if method == "krylov": pytest.importorskip("mpi4py") pytest.importorskip("petsc4py") pytest.importorskip("slepc4py") g = GPCCA(P_2, eta=None, z="LR", method=method) # The following very crude minChi testing is necessary, # since the initial guess for the rotation matrix and thus minChi can vary. minChi = g.minChi(2, 12) assert len(minChi) == len(minChi_P_2_LR) assert minChi[0] > -1e-08 assert minChi[1] > -1e-08 assert minChi[3] > -1e-01 assert minChi[10] > -1e-08 g.optimize({"m_min": 2, "m_max": 12}) n_m = g.n_m assert_allclose(g.crispness_values, crispness_values_P_2_LR) assert_allclose(g.optimal_crispness, optimal_crispness_P_2_LR) assert_allclose(n_m, n_m_P_2_LR) assert_allclose(g.top_eigenvalues, top_eigenvalues_P_2_LR) assert_allclose(g.dominant_eigenvalues, top_eigenvalues_P_2_LR[:n_m])
def _generate_ground_truth_rot_matrices(self): # this function generates the data for "test_init_final_rotation_matrix" P, sd = get_known_input(mu(0)) g_ks = GPCCA(csr_matrix(P), method="krylov").optimize(3) g_kd = GPCCA(P, method="krylov").optimize(3) for g in [g_ks, g_kd]: g.schur_vectors _initialize_rot_matrix(sd) g.rotation_matrix
def test_gpcca_krylov_sparse_eq_dense_mu(self, example_matrix_mu: np.ndarray): mu = int(example_matrix_mu[2, 4]) if mu == 1000: pytest.skip("rtol=0.03359514, atol=3.73976903e+14") opt_clust = {0: 3, 10: 3, 50: 3, 100: 3, 200: 2, 500: 2, 1000: 5}[mu] P, sd = get_known_input(example_matrix_mu) g_s = GPCCA(csr_matrix(P), eta=sd, method="krylov").optimize(opt_clust) g_d = GPCCA(P, eta=sd, method="krylov").optimize(opt_clust) g_b = GPCCA(P, eta=sd, method="brandts").optimize(opt_clust) assert issparse(g_s.transition_matrix) assert not issparse(g_d.transition_matrix) assert not issparse(g_b.transition_matrix) assert_allclose(g_s.memberships.sum(1), 1.0) assert_allclose(g_d.memberships.sum(1), 1.0) assert_allclose(g_b.memberships.sum(1), 1.0) X_k, X_kd, X_b = g_s.schur_vectors, g_d.schur_vectors, g_b.schur_vectors RR_k, RR_kd, RR_b = g_s.schur_matrix, g_d.schur_matrix, g_b.schur_matrix # check if it's a correct Schur form _assert_schur(P, X_k, RR_k, N=None) _assert_schur(P, X_kd, RR_kd, N=None) _assert_schur(P, X_b, RR_b, N=None) # check if they span the same subspace assert np.max(subspace_angles(X_k, X_kd)) < eps assert np.max(subspace_angles(X_kd, X_b)) < eps ms, md, mb = g_s.memberships, g_d.memberships, g_b.memberships cs, cd, cb = ( g_s.coarse_grained_transition_matrix, g_d.coarse_grained_transition_matrix, g_b.coarse_grained_transition_matrix, ) for left, right in combinations( ["brandts", "dense_krylov", "sparse_krylov"], r=2): ml, cl = locals()[f"m{left[0]}"], locals()[f"c{left[0]}"] mr, cr = locals()[f"m{right[0]}"], locals()[f"c{right[0]}"] perm = _find_permutation(ml, mr) mr = mr[:, perm] assert_allclose(mr, ml, atol=1e-4) cr = cr[perm, :][:, perm] try: assert_allclose(cr, cl, atol=1e-4) except AssertionError as e: raise RuntimeError(f"Comparing: {left} and {right}.") from e
def test_use_minChi(self): kmin, kmax = 2, 9 kopt = [] for mu_ in [10, 50, 100, 200, 500, 1000]: P, sd = get_known_input(mu(mu_)) g = GPCCA(P, eta=sd) minChi = g.minChi(kmin, kmax) kopt.append(kmax - 1 - np.argmax(np.flipud(minChi[1:-1]))) np.testing.assert_array_equal(kopt, [3] * 5 + [7])
def test_split_warning_LR(self, P_2: np.ndarray): g = GPCCA(P_2, eta=None, z="LR") with pytest.warns( UserWarning, match= "Clustering into 7 clusters will split complex conjugate eigenvalues. " "Skipping clustering into 7 clusters.", ): g.optimize({"m_min": 2, "m_max": 8}) with pytest.warns( UserWarning, match= "Clustering into 9 clusters will split complex conjugate eigenvalues. " "Skipping clustering into 9 clusters.", ): g.optimize({"m_min": 8, "m_max": 10}) with pytest.warns( UserWarning, match= "Clustering into 11 clusters will split complex conjugate eigenvalues. " "Skipping clustering into 11 clusters.", ): g.optimize({"m_min": 10, "m_max": 12})
def test_memberships_normal_case_sparse_vs_dense( self, P: np.ndarray, sd: np.ndarray, count_sd: np.ndarray, ): assert_allclose(sd, count_sd) # sanity check g_d = GPCCA(P, eta=sd) g_d.optimize((2, 10)) g_s = GPCCA(csr_matrix(P), eta=sd, method="krylov") g_s.optimize((2, 10)) # also passes without this ms, md = g_s.memberships, g_d.memberships cs, cd = ( g_s.coarse_grained_transition_matrix, g_d.coarse_grained_transition_matrix, ) perm = _find_permutation(md, ms) ms = ms[:, perm] assert_allclose(ms, md) cs = cs[perm, :][:, perm] assert_allclose(cs, cd)
def test_split_raise_LM(self, P_2: np.ndarray): g = GPCCA(P_2, eta=None, z="LM") with pytest.raises( ValueError, match= "Clustering into 4 clusters will split complex conjugate eigenvalues. " "Request one cluster more or less.", ): g.optimize(4) with pytest.raises( ValueError, match= "Clustering into 6 clusters will split complex conjugate eigenvalues. " "Request one cluster more or less.", ): g.optimize(6) with pytest.raises( ValueError, match= "Clustering into 9 clusters will split complex conjugate eigenvalues. " "Request one cluster more or less.", ): g.optimize(9)
def test_gpcca_krylov_sparse_eq_dense_count(self, P: np.ndarray, sd: np.ndarray): # all of them cluster optimally into 3 clusters g_s = GPCCA(csr_matrix(P), eta=sd, method="krylov").optimize([2, 5]) g_d = GPCCA(P, eta=sd, method="krylov").optimize([2, 5]) g_b = GPCCA(P, eta=sd, method="brandts").optimize([2, 5]) assert issparse(g_s.transition_matrix) assert not issparse(g_d.transition_matrix) assert not issparse(g_b.transition_matrix) assert_allclose(g_s.memberships.sum(1), 1.0) assert_allclose(g_d.memberships.sum(1), 1.0) assert_allclose(g_b.memberships.sum(1), 1.0) X_k, X_kd, X_b = g_s.schur_vectors, g_d.schur_vectors, g_b.schur_vectors RR_k, RR_kd, RR_b = g_s.schur_matrix, g_d.schur_matrix, g_b.schur_matrix # check if it's a correct Schur form _assert_schur(P, X_k, RR_k, N=None, subspace=True) _assert_schur(P, X_kd, RR_kd, N=None, subspace=True) _assert_schur(P, X_b, RR_b, N=None, subspace=True) # check if they span the same subspace assert np.max(subspace_angles(X_k, X_kd)) < eps assert np.max(subspace_angles(X_kd, X_b)) < eps ms, md, mb = g_s.memberships, g_d.memberships, g_b.memberships cs, cd, cb = ( g_s.coarse_grained_transition_matrix, g_d.coarse_grained_transition_matrix, g_b.coarse_grained_transition_matrix, ) for left, right in combinations( ["brandts", "dense_krylov", "sparse_krylov"], r=2): ml, cl = locals()[f"m{left[0]}"], locals()[f"c{left[0]}"] mr, cr = locals()[f"m{right[0]}"], locals()[f"c{right[0]}"] perm = _find_permutation(ml, mr) mr = mr[:, perm] assert_allclose(mr, ml) cr = cr[perm, :][:, perm] try: assert_allclose(cr, cl) except AssertionError as e: raise RuntimeError(f"Comparing: {left} and {right}.") from e
def test_optimize_range_all_invalid(self, P_2: np.ndarray, mocker): g = GPCCA(P_2, eta=None, z="LR") mocker.patch( "pygpcca._gpcca._gpcca_core", # chi, rot. mat., crispness return_value=(np.empty((P_2.shape[0], 3)), np.empty_like( (3, 3)), 0), ) with pytest.raises( ValueError, match= r"Clustering wasn't successful. Try different cluster numbers." ): g.optimize([3, P_2.shape[0]])
def test_gpcca_brandts_sparse_is_not_densified(self, P: np.ndarray, sd: np.ndarray): with pytest.raises( ValueError, match= r"Sparse implementation is only available for `method='krylov'`." ): GPCCA(csr_matrix(P), eta=sd, method="brandts").optimize(3)
def test_normal_case( self, P: np.ndarray, sd: np.ndarray, count_sd: np.ndarray, count_Pc: np.ndarray, count_chi: np.ndarray, ): assert_allclose(sd, count_sd) g = GPCCA(P, eta=sd) g.optimize((2, 10)) Pc = g.coarse_grained_transition_matrix assert_allclose(Pc, count_Pc, atol=eps) assert_allclose(Pc.sum(1), 1.0) assert_allclose(g.coarse_grained_transition_matrix.sum(1), 1.0) assert_allclose(g.memberships.sum(1), 1.0) assert np.max(subspace_angles(g.memberships, count_chi)) < eps
def test_transition_matrix_dtype(self, P_2: np.ndarray): g = GPCCA(P_2, eta=None, z="LR") assert g.transition_matrix.dtype == np.float64
def test_empty_P(self): with pytest.raises( AssertionError, match=r"Expected shape 2 but given array has shape \d+"): GPCCA(np.array([]))
def test_input_distribution_dtype(self, P_2: np.ndarray, eta: Optional[np.ndarray]): g = GPCCA(P_2, eta=eta, z="LR") assert g.input_distribution.dtype == np.float64
def test_non_square_P(self): with pytest.raises(AssertionError, match=r"Given array is not uniform: \[\d+ \d+\]"): GPCCA(np.random.normal(size=(4, 3)))
def test_empty_sd(self, P: np.ndarray): with pytest.raises(ValueError, match=r"eta vector length"): GPCCA(P, eta=[])
def test_k_input(self, P: np.ndarray, sd: np.ndarray): g = GPCCA(P, eta=sd) with pytest.raises( ValueError, match=r"m_min \(5\) must be smaller than m_max \(3\)."): g.minChi(m_min=5, m_max=3)
def test_too_small_kkmin(self, P: np.ndarray, sd: np.ndarray): g = GPCCA(P, eta=sd) with pytest.raises( ValueError, match=r"There is no point in clustering into `0` clusters."): g.minChi(m_min=0, m_max=10)