Beispiel #1
0
def test_infer_dim_1():
    # TODO: explain what this is testing
    # Or at least use explicit variable names...
    n, p = 1000, 5
    rng = np.random.RandomState(0)
    X = (rng.randn(n, p) * .1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2]) +
         np.array([1, 0, 7, 4, 6]))
    pca = PCA(n_components=p, svd_solver='full')
    pca.fit(X)
    spect = pca.explained_variance_
    ll = np.array([_assess_dimension_(spect, k, n, p) for k in range(p)])
    assert_greater(ll[1], ll.max() - .01 * n)
Beispiel #2
0
def test_infer_dim_1():
    # TODO: explain what this is testing
    # Or at least use explicit variable names...
    n, p = 1000, 5
    rng = np.random.RandomState(0)
    X = (rng.randn(n, p) * .1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2]) +
         np.array([1, 0, 7, 4, 6]))
    pca = PCA(n_components=p, svd_solver='full')
    pca.fit(X)
    spect = pca.explained_variance_
    ll = np.array([_assess_dimension_(spect, k, n, p) for k in range(p)])
    assert_greater(ll[1], ll.max() - .01 * n)
Beispiel #3
0
def test_infer_dim_1():
    """TODO: explain what this is testing

    Or at least use explicit variable names...
    """
    n, p = 1000, 5
    rng = np.random.RandomState(0)
    X = rng.randn(n, p) * 0.1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2]) + np.array([1, 0, 7, 4, 6])
    pca = PCA(n_components=p)
    pca.fit(X)
    spect = pca.explained_variance_
    ll = []
    for k in range(p):
        ll.append(_assess_dimension_(spect, k, n, p))
    ll = np.array(ll)
    assert_greater(ll[1], ll.max() - 0.01 * n)
Beispiel #4
0
def test_infer_dim_1():
    # TODO: explain what this is testing
    # Or at least use explicit variable names...
    n, p = 1000, 5
    rng = np.random.RandomState(0)
    X = (rng.randn(n, p) * 0.1 + rng.randn(n, 1) * np.array([3, 4, 5, 1, 2]) +
         np.array([1, 0, 7, 4, 6]))
    X = da.from_array(X, chunks=(n, p))
    pca = dd.PCA(n_components=p, svd_solver="full")
    pca.fit(X)
    spect = pca.explained_variance_
    ll = []
    for k in range(p):
        ll.append(_assess_dimension_(spect, k, n, p))
    ll = np.array(ll)
    assert ll[1] > ll.max() - 0.01 * n
Beispiel #5
0
    def get_space(self):

        cov_mat_sqrt_np = self.cov_mat_sqrt.clone().cpu().numpy()

        # perform PCA on DD'
        cov_mat_sqrt_np /= (max(1, self.rank.item() - 1))**0.5

        if self.pca_rank == 'mle':
            pca_rank = self.rank.item()
        else:
            pca_rank = self.pca_rank

        pca_rank = max(1, min(pca_rank, self.rank.item()))
        pca_decomp = TruncatedSVD(n_components=pca_rank)
        pca_decomp.fit(cov_mat_sqrt_np)

        _, s, Vt = randomized_svd(cov_mat_sqrt_np,
                                  n_components=pca_rank,
                                  n_iter=5)

        # perform post-selection fitting
        if self.pca_rank == 'mle':
            eigs = s**2.0
            ll = np.zeros(len(eigs))
            correction = np.zeros(len(eigs))

            # compute minka's PCA marginal log likelihood and the correction term
            for rank in range(len(eigs)):
                # secondary correction term based on the rank of the matrix + degrees of freedom
                m = cov_mat_sqrt_np.shape[1] * rank - rank * (rank + 1) / 2.
                correction[rank] = 0.5 * m * np.log(cov_mat_sqrt_np.shape[0])
                ll[rank] = _assess_dimension_(
                    spectrum=eigs,
                    rank=rank,
                    n_features=min(cov_mat_sqrt_np.shape),
                    n_samples=max(cov_mat_sqrt_np.shape))

            self.ll = ll
            self.corrected_ll = ll - correction
            self.pca_rank = np.nanargmax(self.corrected_ll)
            print('PCA Rank is: ', self.pca_rank)
            return torch.FloatTensor(s[:self.pca_rank, None] *
                                     Vt[:self.pca_rank, :])
        else:
            return torch.FloatTensor(s[:, None] * Vt)