Ejemplo n.º 1
0
def test_get_stats_icd_check_corrs():
    X = np.vstack((np.eye(3,3), 2*np.eye(3,3)))
    Y1 = np.fliplr(np.eye(3,3))
    Y = np.vstack((Y1, 0.1*np.eye(3,3)))

    kcca = KCCA(n_components=3, decomp='icd')
    out = kcca.fit([X, Y]).transform([X, Y])
    stats = kcca.get_stats()

    assert np.allclose(stats['r'], np.array([0.51457091, 0.3656268]))
Ejemplo n.º 2
0
def test_get_stats_nonlinear_kernel():
    kcca_poly = KCCA(ktype='poly')
    kcca_poly.fit([train1, train2]).transform([train1, train2])
    stats = kcca_poly.get_stats()
    assert np.all(stats['r']>0)
    assert stats['r'].shape == (2,)

    kcca_gaussian = KCCA(ktype='gaussian')
    kcca_gaussian.fit([train1, train2]).transform([train1, train2])
    stats = kcca_gaussian.get_stats()
    assert np.all(stats['r']>0)
    assert stats['r'].shape == (2,)
Ejemplo n.º 3
0
def test_get_stats_wrong():
    kcca_bad = KCCA()
    with pytest.raises(NameError):
        kcca_bad.get_stats()
    with pytest.raises(NameError):
        kcca_bad.fit([train1, train2])
        stats = kcca_bad.get_stats()
Ejemplo n.º 4
0
def test_get_stats_1_feature_vs_matlab():
    X = np.arange(1, 11).reshape(-1, 1)
    Y = np.arange(2, 21, 2).reshape(-1, 1)
    matlab_stats = {'r': np.array([1]),
                    'Wilks': np.array([0]),
                    'df1': np.array([1]),
                    'df2': np.array([8]),
                    'F': np.array([np.inf]),
                    'pF': np.array([0]),
                    'chisq': np.array([np.inf]),
                    'pChisq': np.array([0])
                    }

    kcca = KCCA(n_components=1)
    out = kcca.fit([X, Y]).transform([X, Y])
    stats = kcca.get_stats()

    for key in stats:
        assert np.allclose(stats[key], matlab_stats[key], rtol=1e-3, atol=1e-4)
Ejemplo n.º 5
0
def test_get_stats_1_component():
    np.random.seed(12)
    X = X = np.random.rand(100,3)
    Y = np.random.rand(100,4)
    past_stats = {'r': np.array([0.22441608326082138]),
                    'Wilks': np.array([0.94963742]),
                    'df1': np.array([12]),
                    'df2': np.array([246.34637455]),
                    'F': np.array([0.40489714]),
                    'pF': np.array([0.96096493]),
                    'chisq': np.array([4.90912773]),
                    'pChisq': np.array([0.9609454])
                    }

    kcca1 = KCCA(n_components=1)
    kcca1.fit_transform([X,Y])
    stats = kcca1.get_stats()

    assert not stats['r'] == 1
    assert not stats['r'] + 2 * np.finfo(float).eps >= 1

    for key in stats:
        assert np.allclose(stats[key], past_stats[key], rtol=1e-3, atol=1e-4)
Ejemplo n.º 6
0
def test_get_stats_2_components():
    np.random.seed(12)
    X = X = np.random.rand(100,3)
    Y = np.random.rand(100,4)
    past_stats = {'r': np.array([0.22441608, 0.19056307]),
                    'Wilks': np.array([0.91515202, 0.96368572]),
                    'df1': np.array([12, 6]),
                    'df2': np.array([246.34637455, 188]),
                    'F': np.array([0.69962605, 0.58490315]),
                    'pF': np.array([0.75134965, 0.74212361]),
                    'chisq': np.array([8.42318331, 4.2115406 ]),
                    'pChisq': np.array([0.75124771, 0.64807349])
                    }

    kcca2 = KCCA(n_components=2)
    kcca2.fit_transform([X,Y])
    stats = kcca2.get_stats()

    nondegen = np.argwhere(stats['r'] < 1 - 2 * np.finfo(float).eps).squeeze()
    assert np.array_equal(nondegen, np.array([0, 1]))

    for key in stats:
        assert np.allclose(stats[key], past_stats[key], rtol=1e-3, atol=1e-4)
Ejemplo n.º 7
0
def test_get_stats_vs_matlab():
    X = np.vstack((np.eye(3,3), 2*np.eye(3,3)))
    Y1 = np.fliplr(np.eye(3,3))
    Y = np.vstack((Y1, 0.1*np.eye(3,3)))
    matlab_stats = {'r': np.array([1.000000000000000, 0.533992991387982, 0.355995327591988]),
                    'Wilks': np.array([0, 0.624256445446525, 0.873267326732673]),
                    'df1': np.array([9, 4, 1]),
                    'df2': np.array([0.150605850666856, 2, 2]),
                    'F': np.array([np.inf, 0.132832080200501, 0.290249433106576]),
                    'pF': np.array([0, 0.955941574355455, 0.644004672408012]),
                    'chisq': np.array([np.inf, 0.706791037156489, 0.542995281660087]),
                    'pChisq': np.array([0, 0.950488814632803, 0.461194028737338])
                    }

    kcca = KCCA(n_components=3)
    out = kcca.fit([X, Y]).transform([X, Y])
    stats = kcca.get_stats()

    assert np.allclose(stats['r'][0], 1)
    nondegen = np.argwhere(stats['r'] < 1 - 2 * np.finfo(float).eps).squeeze()
    assert np.array_equal(nondegen, np.array([1, 2]))

    for key in stats:
        assert np.allclose(stats[key], matlab_stats[key], rtol=1e-3, atol=1e-4)
Ejemplo n.º 8
0
def test_float_degree():
    with pytest.raises(ValueError):
        kcca_h = KCCA(ktype="poly", reg=0.001, n_components=1, degree=1.0)
Ejemplo n.º 9
0
def test_neg_degree():
    with pytest.raises(ValueError):
        kcca_g = KCCA(ktype="poly", reg=0.001, n_components=1, degree=-1)
Ejemplo n.º 10
0
def test_int_sigma():
    with pytest.raises(ValueError):
        kcca_f = KCCA(ktype="gaussian", reg=0.001, n_components=1, sigma=1)
Ejemplo n.º 11
0
def test_neg_reg():
    with pytest.raises(ValueError):
        kcca_d = KCCA(ktype="linear", reg=-0.001, n_components=1)
Ejemplo n.º 12
0
def test_float_nc():
    with pytest.raises(ValueError):
        kcca_c = KCCA(ktype="linear", reg=0.001, n_components=1.0)
Ejemplo n.º 13
0
def test_neg_nc():
    with pytest.raises(ValueError):
        kcca_b = KCCA(ktype="linear", reg=0.001, n_components=-1)
Ejemplo n.º 14
0
def test_precision():
    with pytest.raises(ValueError):
        kcca_x = KCCA(ktype ="poly", decomp = "icd", reg = 0.001, precision = -1)
Ejemplo n.º 15
0
# Create two datasets, with each dimension composed as a sum of 75% one of the latent variables and 25% independent component
data1 = 0.25 * indep1 + 0.75 * np.vstack(
    (latvar1, latvar2, latvar1, latvar2)).T
data2 = 0.25 * indep2 + 0.75 * np.vstack(
    (latvar1, latvar2, latvar1, latvar2, latvar1)).T

# Split each dataset into a training set and test set (10% of dataset is training data)
train1 = data1[:int(nSamples / 10)]
train2 = data2[:int(nSamples / 10)]
test1 = data1[int(nSamples / 10):]
test2 = data2[int(nSamples / 10):]

n_components = 4

# Initialize a linear kCCA class
kcca_l = KCCA(ktype="linear", reg=0.001, n_components=n_components)

# Use the methods to find a kCCA mapping and transform the views of data
kcca_ft = kcca_l.fit_transform([train1, train2])
kcca_f = kcca_l.fit([train1, train2])
kcca_t = kcca_l.transform([train1, train2])


# Test that cancorrs_ is equal to n_components
def test_numCC_cancorrs_():
    assert len(kcca_ft.cancorrs_) == n_components


# Test that number of views is equal to number of ws_
def test_numCC_ws_():
    assert len(kcca_ft.weights_) == 2
Ejemplo n.º 16
0
def test_neg_constant():
    with pytest.raises(ValueError):
        kcca_g = KCCA(ktype ="poly", reg = 0.001, constant = -1)
Ejemplo n.º 17
0
def test_ktype_gaussian():
    kgauss = KCCA(ktype='gaussian', reg=0.0001, n_components=2, sigma=2.0)
    kgauss.fit_transform([train1, train2])
    assert len(kgauss.components_) == 2
Ejemplo n.º 18
0
def test_mrank_neg():
    with pytest.raises(ValueError):
        kcca_z = KCCA(ktype ="poly", decomp = "icd", reg = 0.001, mrank = -1)
Ejemplo n.º 19
0
def test_method():
    with pytest.raises(ValueError):
        kcca_v = KCCA(ktype ="poly", method = "test", reg = 0.001, mrank = 5)
Ejemplo n.º 20
0
def test_inf_cutoff():
    with pytest.raises(ValueError):
        kcca_j = KCCA(ktype="poly", reg=0.001, n_components=1, cutoff=1)
Ejemplo n.º 21
0
def test_bad_ktype():
    with pytest.raises(ValueError):
        kcca_a = KCCA(ktype="test", reg=0.001, n_components=n_components)
Ejemplo n.º 22
0
def test_no_weights():
    with pytest.raises(NameError):
        kcca_b = KCCA(ktype="linear", reg=0.001, n_components=1)
        kcca_b.transform([train1, train2])
Ejemplo n.º 23
0
def test_icd_mrank():
    kcca_g_icd = KCCA(ktype ="gaussian", sigma = 1.0, n_components = 2, reg = 0.01, decomp = 'icd', mrank = 2)
    icd = kcca_g_icd.fit_transform([x, y])
    assert (len(icd) == 2)
Ejemplo n.º 24
0
def test_ktype_polynomial():
    kpoly = KCCA(ktype='poly', reg=0.0001, n_components=2, degree=3)
    kpoly.fit_transform([train1, train2])
    assert len(kpoly.components_) == 2
Ejemplo n.º 25
0
# ICD is run on two views of data that each have two dimensions that are
# sinuisoidally related. The data has 100 samples and thus the fully decomposed
# kernel matrix would have dimensions (100, 100). Instead we implement ICD with
# a kernel matrix of rank 50 (mrank = 50).

np.random.seed(1)
Xsg = make_data('gaussian', 100)

crossviews_plot(Xsg, ax_ticks=False, ax_labels=True, equal_axes=True)

###############################################################################
# Full Decomposition
# ^^^^^^^^^^^^^^^^^^

kcca_g = KCCA(ktype="gaussian", n_components=2, reg=0.01)
kcca_g.fit(Xsg)
gausskcca = kcca_g.transform(Xsg)

crossviews_plot(gausskcca, ax_ticks=False, ax_labels=True, equal_axes=True)

(gr1, _) = stats.pearsonr(gausskcca[0][:, 0], gausskcca[1][:, 0])
(gr2, _) = stats.pearsonr(gausskcca[0][:, 1], gausskcca[1][:, 1])

print("Below are the canonical correlation of the two components:")
print(gr1, gr2)

###############################################################################
# ICD Decomposition
# ^^^^^^^^^^^^^^^^^