def test_pca(n_components, n_individual_components, multiview_output): gpca = GroupPCA( n_components=n_components, n_individual_components=n_individual_components, multiview_output=multiview_output, ) n_samples = 100 n_features = [6, 4, 5] rng = np.random.RandomState(0) Xs = [ rng.multivariate_normal(np.zeros(p), np.eye(p), size=n_samples) for p in n_features ] # check the shape of fit.transform X_r = gpca.fit(Xs).transform(Xs) if multiview_output: assert len(X_r) == 3 for X in X_r: assert X.shape[0] == n_samples if n_components is not None: assert X.shape[1] == n_components else: assert X_r.shape[0] == n_samples if n_components is not None: assert X_r.shape[1] == n_components # check the equivalence of fit.transform and fit_transform X_r2 = gpca.fit_transform(Xs) X_r = gpca.transform(Xs) assert_allclose(X_r, X_r2)
def test_whitening(n_individual_components, prewhiten, multiview_output): # Check that PCA output has unit-variance rng = np.random.RandomState(0) n_samples = 100 n_features = 80 n_components = 30 rank = 50 # some low rank data with correlated features X = np.dot( rng.randn(n_samples, rank), np.dot(np.diag(np.linspace(10.0, 1.0, rank)), rng.randn(rank, n_features)), ) # the component-wise variance of the first 50 features is 3 times the # mean component-wise variance of the remaining 30 features X[:, :50] *= 3 assert X.shape == (n_samples, n_features) # the component-wise variance is thus highly varying: assert X.std(axis=0).std() > 43.8 Xs = np.array_split(X, 3, axis=1) print([x.shape for x in Xs]) Xs_ = Xs.copy() # make sure we keep an original across iterations. gpca = GroupPCA( n_components=n_components, whiten=True, prewhiten=prewhiten, random_state=0, n_individual_components=n_individual_components, multiview_output=multiview_output, ) # test fit_transform X_whitened = gpca.fit_transform(Xs_) X_whitened2 = gpca.transform(Xs_) assert_allclose(X_whitened, X_whitened2, rtol=5e-4) if multiview_output: assert len(X_whitened) == 3 for X in X_whitened: assert X.shape == (n_samples, n_components) else: assert X_whitened.shape == (n_samples, n_components) assert_allclose(X_whitened.std(ddof=1, axis=0), np.ones(n_components)) Xs_ = Xs.copy() gpca = GroupPCA( n_components=n_components, whiten=False, prewhiten=prewhiten, n_individual_components=n_individual_components, multiview_output=multiview_output, random_state=rng, ).fit(Xs) X_unwhitened = gpca.transform(Xs_) if multiview_output: assert len(X_unwhitened) == 3 for X in X_unwhitened: assert X.shape == (n_samples, n_components) else: assert X_unwhitened.shape == (n_samples, n_components)
def test_grouppca_deterministic_output(): n_samples = 100 n_features = [6, 4, 5] rng = np.random.RandomState(0) Xs = [ rng.multivariate_normal(np.zeros(p), np.eye(p), size=n_samples) for p in n_features ] transformed_X = np.zeros((20, 2)) for i in range(20): pca = GroupPCA( n_components=2, n_individual_components=3, multiview_output=False, random_state=rng, ) transformed_X[i, :] = pca.fit_transform(Xs)[0] assert_allclose(transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2))