Ejemplo n.º 1
0
def test_assess_dimension_bad_rank():
    # Test error when tested rank not in [1, n_features - 1]
    spectrum = np.array([1, 1e-30, 1e-30, 1e-30])
    n_samples = 10
    for rank in (0, 5):
        with pytest.raises(ValueError, match=r"should be in \[1, n_features - 1\]"):
            _assess_dimension(spectrum, rank, n_samples)
Ejemplo n.º 2
0
def test_assess_dimesion_rank_one():
    # Make sure assess_dimension works properly on a matrix of rank 1
    n_samples, n_features = 9, 6
    X = np.ones((n_samples, n_features))  # rank 1 matrix
    _, s, _ = np.linalg.svd(X, full_matrices=True)
    assert sum(s[1:]) == 0  # except for rank 1, all eigenvalues are 0

    assert np.isfinite(_assess_dimension(s, rank=1, n_samples=n_samples))
    for rank in range(2, n_features):
        assert _assess_dimension(s, rank, n_samples) == -np.inf
Ejemplo n.º 3
0
def test_assess_dimesion_rank_one():
    # Make sure assess_dimension works properly on a matrix of rank 1
    n_samples, n_features = 9, 6
    X = np.ones((n_samples, n_features))  # rank 1 matrix
    _, s, _ = np.linalg.svd(X, full_matrices=True)
    # except for rank 1, all eigenvalues are 0 resp. close to 0 (FP)
    assert_allclose(s[1:], np.zeros(n_features - 1), atol=1e-12)

    assert np.isfinite(_assess_dimension(s, rank=1, n_samples=n_samples))
    for rank in range(2, n_features):
        assert _assess_dimension(s, rank, n_samples) == -np.inf
Ejemplo n.º 4
0
def test_small_eigenvalues_mle():
    # Test rank associated with tiny eigenvalues are given a log-likelihood of
    # -inf. The inferred rank will be 1
    spectrum = np.array([1, 1e-30, 1e-30, 1e-30])

    assert _assess_dimension(spectrum, rank=1, n_samples=10) > -np.inf

    for rank in (2, 3):
        assert _assess_dimension(spectrum, rank, 10) == -np.inf

    assert _infer_dimension(spectrum, 10) == 1
Ejemplo n.º 5
0
def test_assess_dimension_error_rank_greater_than_features():
    # Test error when tested rank is greater than the number of features
    # for PR #16224
    spectrum = np.array([1, 1e-30, 1e-30, 1e-30])
    n_samples = 10
    n_features = 4
    rank = 5
    with pytest.raises(ValueError,
                       match="The tested rank cannot exceed "
                       "the rank of the dataset"):
        _assess_dimension(spectrum, rank, n_samples, n_features)
Ejemplo n.º 6
0
def test_assess_dimension_small_eigenvalues():
    # Test tiny eigenvalues appropriately when using 'mle'
    # for  PR #16224
    spectrum = np.array([1, 1e-30, 1e-30, 1e-30])
    n_samples = 10
    n_features = 5
    rank = 3
    ret = _assess_dimension(spectrum, rank, n_samples, n_features)
    assert ret == -np.inf
Ejemplo n.º 7
0
def test_infer_dim_1():
    # TODO: explain what this is testing
    # Or at least use explicit variable names...
    n, p = 1000, 5
    rng = np.random.RandomState(0)
    X = (rng.randn(n, p) * 0.1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2]) +
         np.array([1, 0, 7, 4, 6]))
    pca = PCA(n_components=p, svd_solver="full")
    pca.fit(X)
    spect = pca.explained_variance_
    ll = np.array([_assess_dimension(spect, k, n) for k in range(1, p)])
    assert ll[1] > ll.max() - 0.01 * n