def test_unregularized_multi(): # Tests unregularized CCA methods for more than 2 views. The idea is that all of these should give the same result. latent_dims = 2 cca = rCCA(latent_dims=latent_dims).fit((X, Y, Z)) iter = CCA_ALS(latent_dims=latent_dims, stochastic=False, tol=1e-12, random_state=rng).fit((X, Y, Z)) gcca = GCCA(latent_dims=latent_dims).fit((X, Y, Z)) mcca = MCCA(latent_dims=latent_dims).fit((X, Y, Z)) kcca = KCCA(latent_dims=latent_dims).fit((X, Y, Z)) corr_cca = cca.score((X, Y, Z)) corr_iter = iter.score((X, Y, Z)) corr_gcca = gcca.score((X, Y, Z)) corr_mcca = mcca.score((X, Y, Z)) corr_kcca = kcca.score((X, Y, Z)) # Check the correlations from each unregularized method are the same assert np.testing.assert_array_almost_equal(corr_cca, corr_iter, decimal=1) is None assert np.testing.assert_array_almost_equal(corr_cca, corr_mcca, decimal=1) is None assert np.testing.assert_array_almost_equal(corr_cca, corr_gcca, decimal=1) is None assert np.testing.assert_array_almost_equal(corr_cca, corr_kcca, decimal=1) is None
def test_sparse_input(): # Tests unregularized CCA methods. The idea is that all of these should give the same result. latent_dims = 2 cca = CCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp)) iter = CCA_ALS( latent_dims=latent_dims, tol=1e-9, stochastic=False, centre=False, random_state=rng, ).fit((X_sp, Y_sp)) iter_pls = PLS_ALS(latent_dims=latent_dims, tol=1e-9, centre=False).fit( (X_sp, Y_sp)) gcca = GCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp)) mcca = MCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp)) kcca = KCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp)) scca = SCCA(latent_dims=latent_dims, centre=False, c=0.001).fit( (X_sp, Y_sp)) corr_cca = cca.score((X, Y)) corr_iter = iter.score((X, Y)) corr_gcca = gcca.score((X, Y)) corr_mcca = mcca.score((X, Y)) corr_kcca = kcca.score((X, Y)) # Check the correlations from each unregularized method are the same assert np.testing.assert_array_almost_equal( corr_iter, corr_mcca, decimal=1) is None assert np.testing.assert_array_almost_equal( corr_iter, corr_gcca, decimal=1) is None assert np.testing.assert_array_almost_equal( corr_iter, corr_kcca, decimal=1) is None
def test_unregularized_methods(): # Tests unregularized CCA methods. The idea is that all of these should give the same result. latent_dims = 2 cca = CCA(latent_dims=latent_dims).fit([X, Y]) iter = CCA_ALS(latent_dims=latent_dims, tol=1e-9, stochastic=False, random_state=rng).fit([X, Y]) gcca = GCCA(latent_dims=latent_dims).fit([X, Y]) mcca = MCCA(latent_dims=latent_dims, eps=1e-9).fit([X, Y]) kcca = KCCA(latent_dims=latent_dims).fit([X, Y]) kgcca = KGCCA(latent_dims=latent_dims).fit([X, Y]) tcca = TCCA(latent_dims=latent_dims).fit([X, Y]) corr_cca = cca.score((X, Y)) corr_iter = iter.score((X, Y)) corr_gcca = gcca.score((X, Y)) corr_mcca = mcca.score((X, Y)) corr_kcca = kcca.score((X, Y)) corr_kgcca = kgcca.score((X, Y)) corr_tcca = tcca.score((X, Y)) assert np.testing.assert_array_almost_equal(corr_cca, corr_iter, decimal=1) is None assert np.testing.assert_array_almost_equal(corr_cca, corr_mcca, decimal=1) is None assert np.testing.assert_array_almost_equal(corr_cca, corr_gcca, decimal=1) is None assert np.testing.assert_array_almost_equal(corr_cca, corr_kcca, decimal=1) is None assert np.testing.assert_array_almost_equal(corr_cca, corr_tcca, decimal=1) is None assert (np.testing.assert_array_almost_equal( corr_kgcca, corr_gcca, decimal=1) is None) # Check standardized models have standard outputs assert (np.testing.assert_allclose( np.linalg.norm(iter.transform( (X, Y))[0], axis=0)**2, n, rtol=0.2) is None) assert (np.testing.assert_allclose( np.linalg.norm(cca.transform( (X, Y))[0], axis=0)**2, n, rtol=0.2) is None) assert (np.testing.assert_allclose( np.linalg.norm(mcca.transform( (X, Y))[0], axis=0)**2, n, rtol=0.2) is None) assert (np.testing.assert_allclose( np.linalg.norm(kcca.transform( (X, Y))[0], axis=0)**2, n, rtol=0.2) is None) assert (np.testing.assert_allclose( np.linalg.norm(iter.transform( (X, Y))[1], axis=0)**2, n, rtol=0.2) is None) assert (np.testing.assert_allclose( np.linalg.norm(cca.transform( (X, Y))[1], axis=0)**2, n, rtol=0.2) is None) assert (np.testing.assert_allclose( np.linalg.norm(mcca.transform( (X, Y))[1], axis=0)**2, n, rtol=0.2) is None) assert (np.testing.assert_allclose( np.linalg.norm(kcca.transform( (X, Y))[1], axis=0)**2, n, rtol=0.2) is None)
def test_cv_fit(self): latent_dims = 5 c1 = [0.1, 0.2] c2 = [0.1, 0.2] param_candidates = {'c': list(itertools.product(c1, c2))} wrap_unweighted_gcca = GCCA(latent_dims=latent_dims).gridsearch_fit(self.X, self.Y, folds=2, param_candidates=param_candidates, plot=True) wrap_deweighted_gcca = GCCA(latent_dims=latent_dims, view_weights=[0.5, 0.5]).gridsearch_fit( self.X, self.Y, folds=2, param_candidates=param_candidates) wrap_mcca = MCCA(latent_dims=latent_dims).gridsearch_fit( self.X, self.Y, folds=2, param_candidates=param_candidates)
def test_weighted_GCCA_methods(self): # Test that linear regularized methods match PLS solution when using maximum regularisation latent_dims = 5 c = 0 wrap_unweighted_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y) wrap_deweighted_gcca = GCCA(latent_dims=latent_dims, c=[c, c], view_weights=[0.5, 0.5]).fit( self.X, self.Y) corr_unweighted_gcca = wrap_unweighted_gcca.train_correlations[0, 1] corr_deweighted_gcca = wrap_deweighted_gcca.train_correlations[0, 1] # Check the correlations from each unregularized method are the same K = np.ones((2, self.X.shape[0])) K[0, 200:] = 0 wrap_unobserved_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y, K=K) self.assertIsNone(np.testing.assert_array_almost_equal(corr_unweighted_gcca, corr_deweighted_gcca, decimal=1))
def test_regularized_methods(): # Test that linear regularized methods match PLS solution when using maximum regularisation. latent_dims = 2 c = 1 kernel = KCCA(latent_dims=latent_dims, c=[c, c], kernel=["linear", "linear"]).fit((X, Y)) pls = PLS(latent_dims=latent_dims).fit([X, Y]) gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit([X, Y]) mcca = MCCA(latent_dims=latent_dims, c=[c, c]).fit([X, Y]) rcca = rCCA(latent_dims=latent_dims, c=[c, c]).fit([X, Y]) corr_gcca = gcca.score((X, Y)) corr_mcca = mcca.score((X, Y)) corr_kernel = kernel.score((X, Y)) corr_pls = pls.score((X, Y)) corr_rcca = rcca.score((X, Y)) # Check the correlations from each unregularized method are the same assert np.testing.assert_array_almost_equal(corr_pls, corr_mcca, decimal=1) is None assert (np.testing.assert_array_almost_equal( corr_pls, corr_kernel, decimal=1) is None) assert np.testing.assert_array_almost_equal(corr_pls, corr_rcca, decimal=1) is None
def test_weighted_GCCA_methods(): # TODO we have view weighted GCCA and missing observation GCCA latent_dims = 2 c = 0 unweighted_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit([X, Y]) deweighted_gcca = GCCA(latent_dims=latent_dims, c=[c, c], view_weights=[0.5, 0.5]).fit([X, Y]) corr_unweighted_gcca = unweighted_gcca.score((X, Y)) corr_deweighted_gcca = deweighted_gcca.score((X, Y)) # Check the correlations from each unregularized method are the same K = np.ones((2, X.shape[0])) K[0, 200:] = 0 unobserved_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit((X, Y), K=K) assert (np.testing.assert_array_almost_equal( corr_unweighted_gcca, corr_deweighted_gcca, decimal=1) is None)
def test_regularized_methods(self): # Test that linear regularized methods match PLS solution when using maximum regularisation latent_dims = 5 c = 1 wrap_kernel = KCCA(latent_dims=latent_dims, c=[c, c], kernel=['linear', 'linear']).fit(self.X, self.Y) wrap_pls = PLS(latent_dims=latent_dims).fit(self.X, self.Y) wrap_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y) wrap_mcca = MCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y) wrap_rCCA = rCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y) corr_gcca = wrap_gcca.train_correlations[0, 1] corr_mcca = wrap_mcca.train_correlations[0, 1] corr_kernel = wrap_kernel.train_correlations[0, 1] corr_pls = wrap_pls.train_correlations[0, 1] corr_rcca = wrap_rCCA.train_correlations[0, 1] # Check the correlations from each unregularized method are the same # self.assertIsNone(np.testing.assert_array_almost_equal(corr_pls, corr_gcca, decimal=2)) self.assertIsNone(np.testing.assert_array_almost_equal(corr_pls, corr_mcca, decimal=1)) self.assertIsNone(np.testing.assert_array_almost_equal(corr_pls, corr_kernel, decimal=1)) self.assertIsNone(np.testing.assert_array_almost_equal(corr_pls, corr_rcca, decimal=1))
def test_unregularized_methods(self): latent_dims = 1 wrap_cca = CCA(latent_dims=latent_dims).fit(self.X, self.Y) wrap_iter = CCA_ALS(latent_dims=latent_dims, tol=1e-9).fit(self.X, self.Y) wrap_gcca = GCCA(latent_dims=latent_dims).fit(self.X, self.Y) wrap_mcca = MCCA(latent_dims=latent_dims).fit(self.X, self.Y) wrap_kcca = KCCA(latent_dims=latent_dims).fit(self.X, self.Y) corr_cca = wrap_cca.train_correlations[0, 1] corr_iter = wrap_iter.train_correlations[0, 1] corr_gcca = wrap_gcca.train_correlations[0, 1] corr_mcca = wrap_mcca.train_correlations[0, 1] corr_kcca = wrap_kcca.train_correlations[0, 1] # Check the score outputs are the right shape self.assertTrue(wrap_iter.score_list[0].shape == (self.X.shape[0], latent_dims)) self.assertTrue(wrap_gcca.score_list[0].shape == (self.X.shape[0], latent_dims)) self.assertTrue(wrap_mcca.score_list[0].shape == (self.X.shape[0], latent_dims)) self.assertTrue(wrap_kcca.score_list[0].shape == (self.X.shape[0], latent_dims)) # Check the correlations from each unregularized method are the same self.assertIsNone(np.testing.assert_array_almost_equal(corr_cca, corr_iter, decimal=2)) self.assertIsNone(np.testing.assert_array_almost_equal(corr_iter, corr_mcca, decimal=2)) self.assertIsNone(np.testing.assert_array_almost_equal(corr_iter, corr_gcca, decimal=2)) self.assertIsNone(np.testing.assert_array_almost_equal(corr_iter, corr_kcca, decimal=2))
latent_dims = 1 cv = 3 (X, Y, Z), (tx, ty, tz) = generate_covariance_data( n, view_features=[p, q, r], latent_dims=latent_dims, correlation=[0.9] ) # %% # Eigendecomposition-Based Methods # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # %% mcca = MCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z)) # %% gcca = GCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z)) # %% # We can also use kernel versions of these methods # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # %% kcca = KCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z)) # %% kgcca = KGCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z)) # %% # Higher order correlation methods # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
### Linear CCA via alternating least squares (can pass more than 2 views) """ # %% linear_cca = CCA(latent_dims=latent_dims) linear_cca.fit(train_view_1, train_view_2) linear_cca_results = np.stack((linear_cca.train_correlations[0, 1], linear_cca.predict_corr(test_view_1, test_view_2)[0, 1])) """ ### (Regularized) Generalized CCA via alternating least squares (can pass more than 2 views) """ gcca = GCCA(latent_dims=latent_dims, c=[1, 1]) gcca.fit(train_view_1, train_view_2) gcca_results = np.stack( (gcca.train_correlations[0, 1], gcca.predict_corr(test_view_1, test_view_2)[0, 1])) """ ### (Regularized) Multiset CCA via alternating least squares (can pass more than 2 views) """ mcca = MCCA(latent_dims=latent_dims, c=[0.5, 0.5]) # small ammount of regularisation added since data is not full rank mcca.fit(train_view_1, train_view_2)