def test_get_stats_2_components(): np.random.seed(12) X = X = np.random.rand(100,3) Y = np.random.rand(100,4) past_stats = {'r': np.array([0.22441608, 0.19056307]), 'Wilks': np.array([0.91515202, 0.96368572]), 'df1': np.array([12, 6]), 'df2': np.array([246.34637455, 188]), 'F': np.array([0.69962605, 0.58490315]), 'pF': np.array([0.75134965, 0.74212361]), 'chisq': np.array([8.42318331, 4.2115406 ]), 'pChisq': np.array([0.75124771, 0.64807349]) } kcca2 = KCCA(n_components=2) kcca2.fit_transform([X,Y]) stats = kcca2.get_stats() nondegen = np.argwhere(stats['r'] < 1 - 2 * np.finfo(float).eps).squeeze() assert np.array_equal(nondegen, np.array([0, 1])) for key in stats: assert np.allclose(stats[key], past_stats[key], rtol=1e-3, atol=1e-4)
def test_get_stats_1_component(): np.random.seed(12) X = X = np.random.rand(100,3) Y = np.random.rand(100,4) past_stats = {'r': np.array([0.22441608326082138]), 'Wilks': np.array([0.94963742]), 'df1': np.array([12]), 'df2': np.array([246.34637455]), 'F': np.array([0.40489714]), 'pF': np.array([0.96096493]), 'chisq': np.array([4.90912773]), 'pChisq': np.array([0.9609454]) } kcca1 = KCCA(n_components=1) kcca1.fit_transform([X,Y]) stats = kcca1.get_stats() assert not stats['r'] == 1 assert not stats['r'] + 2 * np.finfo(float).eps >= 1 for key in stats: assert np.allclose(stats[key], past_stats[key], rtol=1e-3, atol=1e-4)
def test_ktype_polynomial(): kpoly = KCCA(ktype='poly', reg=0.0001, n_components=2, degree=3) kpoly.fit_transform([train1, train2]) assert len(kpoly.components_) == 2
def test_ktype_gaussian(): kgauss = KCCA(ktype='gaussian', reg=0.0001, n_components=2, sigma=2.0) kgauss.fit_transform([train1, train2]) assert len(kgauss.components_) == 2
data2 = 0.25 * indep2 + 0.75 * np.vstack( (latvar1, latvar2, latvar1, latvar2, latvar1)).T # Split each dataset into a training set and test set (10% of dataset is training data) train1 = data1[:int(nSamples / 10)] train2 = data2[:int(nSamples / 10)] test1 = data1[int(nSamples / 10):] test2 = data2[int(nSamples / 10):] n_components = 4 # Initialize a linear kCCA class kcca_l = KCCA(ktype="linear", reg=0.001, n_components=n_components) # Use the methods to find a kCCA mapping and transform the views of data kcca_ft = kcca_l.fit_transform([train1, train2]) kcca_f = kcca_l.fit([train1, train2]) kcca_t = kcca_l.transform([train1, train2]) # Test that cancorrs_ is equal to n_components def test_numCC_cancorrs_(): assert len(kcca_ft.cancorrs_) == n_components # Test that number of views is equal to number of ws_ def test_numCC_ws_(): assert len(kcca_ft.weights_) == 2 # Test that number of views is equal to number of comps_
def test_icd_mrank(): kcca_g_icd = KCCA(ktype ="gaussian", sigma = 1.0, n_components = 2, reg = 0.01, decomp = 'icd', mrank = 2) icd = kcca_g_icd.fit_transform([x, y]) assert (len(icd) == 2)
(gr2, _) = stats.pearsonr(gausskcca[0][:, 1], gausskcca[1][:, 1]) print("Below are the canonical correlation of the two components:") print(gr1, gr2) ############################################################################### # ICD Decomposition # ^^^^^^^^^^^^^^^^^ kcca_g_icd = KCCA(ktype="gaussian", sigma=1.0, n_components=2, reg=0.01, decomp='icd', mrank=50) icd_g = kcca_g_icd.fit_transform(Xsg) crossviews_plot(icd_g, ax_ticks=False, ax_labels=True, equal_axes=True) (icdr1, _) = stats.pearsonr(icd_g[0][:, 0], icd_g[1][:, 0]) (icdr2, _) = stats.pearsonr(icd_g[0][:, 1], icd_g[1][:, 1]) print("Below are the canonical correlation of the two components:") print(icdr1, icdr2) # The canonical correlations of full vs ICD (mrank=50) are very similar! ############################################################################### # ICD Kernel Rank vs. Canonical Correlation # -----------------------------------------