def test_whitening(n_individual_components, prewhiten, multiview_output): # Check that PCA output has unit-variance rng = np.random.RandomState(0) n_samples = 100 n_features = 80 n_components = 30 rank = 50 # some low rank data with correlated features X = np.dot( rng.randn(n_samples, rank), np.dot(np.diag(np.linspace(10.0, 1.0, rank)), rng.randn(rank, n_features)), ) # the component-wise variance of the first 50 features is 3 times the # mean component-wise variance of the remaining 30 features X[:, :50] *= 3 assert X.shape == (n_samples, n_features) # the component-wise variance is thus highly varying: assert X.std(axis=0).std() > 43.8 Xs = np.array_split(X, 3, axis=1) print([x.shape for x in Xs]) Xs_ = Xs.copy() # make sure we keep an original across iterations. gpca = GroupPCA( n_components=n_components, whiten=True, prewhiten=prewhiten, random_state=0, n_individual_components=n_individual_components, multiview_output=multiview_output, ) # test fit_transform X_whitened = gpca.fit_transform(Xs_) X_whitened2 = gpca.transform(Xs_) assert_allclose(X_whitened, X_whitened2, rtol=5e-4) if multiview_output: assert len(X_whitened) == 3 for X in X_whitened: assert X.shape == (n_samples, n_components) else: assert X_whitened.shape == (n_samples, n_components) assert_allclose(X_whitened.std(ddof=1, axis=0), np.ones(n_components)) Xs_ = Xs.copy() gpca = GroupPCA( n_components=n_components, whiten=False, prewhiten=prewhiten, n_individual_components=n_individual_components, multiview_output=multiview_output, random_state=rng, ).fit(Xs) X_unwhitened = gpca.transform(Xs_) if multiview_output: assert len(X_unwhitened) == 3 for X in X_unwhitened: assert X.shape == (n_samples, n_components) else: assert X_unwhitened.shape == (n_samples, n_components)
def test_pca(n_components, n_individual_components, multiview_output): gpca = GroupPCA( n_components=n_components, n_individual_components=n_individual_components, multiview_output=multiview_output, ) n_samples = 100 n_features = [6, 4, 5] rng = np.random.RandomState(0) Xs = [ rng.multivariate_normal(np.zeros(p), np.eye(p), size=n_samples) for p in n_features ] # check the shape of fit.transform X_r = gpca.fit(Xs).transform(Xs) if multiview_output: assert len(X_r) == 3 for X in X_r: assert X.shape[0] == n_samples if n_components is not None: assert X.shape[1] == n_components else: assert X_r.shape[0] == n_samples if n_components is not None: assert X_r.shape[1] == n_components # check the equivalence of fit.transform and fit_transform X_r2 = gpca.fit_transform(Xs) X_r = gpca.transform(Xs) assert_allclose(X_r, X_r2)
def test_grouppca_inverse(n_individual_components, prewhiten, whiten, multiview_output): # Test that the projection of data can be inverted rng = np.random.RandomState(0) n, p = 50, 3 X = rng.randn(n, p) # spherical data X[:, 1] *= 0.00001 # make middle component relatively small X += [5, 4, 3] # make a large mean X2 = np.copy(X) X2[:, 1] += rng.randn(n) * 0.00001 Xs = [X, X2] gpca = GroupPCA( n_components=2, prewhiten=prewhiten, whiten=whiten, n_individual_components=n_individual_components, multiview_output=multiview_output, ).fit(Xs) Y = gpca.transform(Xs) Y_inverse = gpca.inverse_transform(Y) assert len(Y_inverse) == len(Xs) for X, X_estimated in zip(Xs, Y_inverse): assert_allclose(X, X_estimated, atol=1e-4)
def test_grouppca_inverse_index( n_individual_components, prewhiten, whiten, multiview_output, index, inverse_index, ): # Test that the projection of data can be inverted rng = np.random.RandomState(0) n, p = 50, 3 X = rng.randn(n, p) # spherical data X[:, 1] *= 0.00001 # make middle component relatively small X += [5, 4, 3] # make a large mean X2 = np.copy(X) X2[:, 1] += rng.randn(n) * 0.00001 X2 = X2.dot(rng.rand(p, p)) X3 = np.copy(X) X3[:, 1] += rng.randn(n) * 0.00001 X3 = X3.dot(rng.rand(p, p)) Xs = [X, X2, X3] gpca = GroupPCA( n_components=2, prewhiten=prewhiten, whiten=whiten, n_individual_components=n_individual_components, multiview_output=multiview_output, ).fit(Xs) if index is not None: index_ = np.atleast_1d(index) Xs_transform = [Xs[i] for i in index_] len_index = len(index_) else: len_index = 3 Xs_transform = np.copy(Xs) if inverse_index is not None: inverse_index_ = np.atleast_1d(inverse_index) Xs_inverse = [Xs[i] for i in inverse_index_] len_inverse_index = len(inverse_index_) else: len_inverse_index = 3 Xs_inverse = np.copy(Xs) Y = gpca.transform(Xs_transform, index=index) if multiview_output and len_index != len_inverse_index: with pytest.raises(AssertionError): Y_inverse = gpca.inverse_transform(Y, index=inverse_index) elif multiview_output and index != inverse_index: pass else: Y_inverse = gpca.inverse_transform(Y, index=inverse_index) for X, X_estimated in zip(Xs_inverse, Y_inverse): assert_allclose(X, X_estimated, atol=1e-4)