def test_assess_dimension_bad_rank(): # Test error when tested rank not in [1, n_features - 1] spectrum = np.array([1, 1e-30, 1e-30, 1e-30]) n_samples = 10 for rank in (0, 5): with pytest.raises(ValueError, match=r"should be in \[1, n_features - 1\]"): _assess_dimension(spectrum, rank, n_samples)
def test_assess_dimesion_rank_one(): # Make sure assess_dimension works properly on a matrix of rank 1 n_samples, n_features = 9, 6 X = np.ones((n_samples, n_features)) # rank 1 matrix _, s, _ = np.linalg.svd(X, full_matrices=True) assert sum(s[1:]) == 0 # except for rank 1, all eigenvalues are 0 assert np.isfinite(_assess_dimension(s, rank=1, n_samples=n_samples)) for rank in range(2, n_features): assert _assess_dimension(s, rank, n_samples) == -np.inf
def test_assess_dimesion_rank_one(): # Make sure assess_dimension works properly on a matrix of rank 1 n_samples, n_features = 9, 6 X = np.ones((n_samples, n_features)) # rank 1 matrix _, s, _ = np.linalg.svd(X, full_matrices=True) # except for rank 1, all eigenvalues are 0 resp. close to 0 (FP) assert_allclose(s[1:], np.zeros(n_features - 1), atol=1e-12) assert np.isfinite(_assess_dimension(s, rank=1, n_samples=n_samples)) for rank in range(2, n_features): assert _assess_dimension(s, rank, n_samples) == -np.inf
def test_small_eigenvalues_mle(): # Test rank associated with tiny eigenvalues are given a log-likelihood of # -inf. The inferred rank will be 1 spectrum = np.array([1, 1e-30, 1e-30, 1e-30]) assert _assess_dimension(spectrum, rank=1, n_samples=10) > -np.inf for rank in (2, 3): assert _assess_dimension(spectrum, rank, 10) == -np.inf assert _infer_dimension(spectrum, 10) == 1
def test_assess_dimension_error_rank_greater_than_features(): # Test error when tested rank is greater than the number of features # for PR #16224 spectrum = np.array([1, 1e-30, 1e-30, 1e-30]) n_samples = 10 n_features = 4 rank = 5 with pytest.raises(ValueError, match="The tested rank cannot exceed " "the rank of the dataset"): _assess_dimension(spectrum, rank, n_samples, n_features)
def test_assess_dimension_small_eigenvalues(): # Test tiny eigenvalues appropriately when using 'mle' # for PR #16224 spectrum = np.array([1, 1e-30, 1e-30, 1e-30]) n_samples = 10 n_features = 5 rank = 3 ret = _assess_dimension(spectrum, rank, n_samples, n_features) assert ret == -np.inf
def test_infer_dim_1(): # TODO: explain what this is testing # Or at least use explicit variable names... n, p = 1000, 5 rng = np.random.RandomState(0) X = (rng.randn(n, p) * 0.1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2]) + np.array([1, 0, 7, 4, 6])) pca = PCA(n_components=p, svd_solver="full") pca.fit(X) spect = pca.explained_variance_ ll = np.array([_assess_dimension(spect, k, n) for k in range(1, p)]) assert ll[1] > ll.max() - 0.01 * n