Beispiel #1
0
def test_crossview_params():
    _ = crossviews_plot(
        Xs,
        labels=labels,
        dimensions=[0, 1],
        title="Test",
        cmap="RdBu",
        context=None,
        show=False,
        ax_ticks=False,
        ax_labels=False,
        equal_axes=True,
    )
Beispiel #2
0
algorithm, one is interested in visualizing two views across dimensions.
One use is assessing correlation between corresponding dimensions of views.
Here, we use this function to display the relationship between two views
simulated from transformations of multi-variant gaussians.

"""

# License: MIT

from mvlearn.datasets import make_gaussian_mixture
from mvlearn.plotting import crossviews_plot
import numpy as np

n_samples = 100
centers = [[0, 1], [0, -1]]
covariances = [np.eye(2), np.eye(2)]
Xs, y = make_gaussian_mixture(n_samples,
                              centers,
                              covariances,
                              transform='poly',
                              noise_dims=2)

# Below, we see that the first two dimensions are related by a degree 2
# polynomial while the latter two dimensions are uncorrelated.

crossviews_plot(Xs,
                labels=y,
                title='View 1 vs. View 2 (Polynomial \
                    Transform + noise)',
                equal_axes=True)
Beispiel #3
0
n_samples = 100
centers = [[0, 1], [0, -1]]
covariances = [np.eye(2), np.eye(2)]
gm = GaussianMixture(n_samples,
                     centers,
                     covariances,
                     random_state=42,
                     shuffle=True,
                     shuffle_random_state=42)
gm = gm.sample_views(transform='poly', n_noise=2)

latent, y = gm.get_Xy(latents=True)
Xs, _ = gm.get_Xy(latents=False)

# The latent data is plotted against itself to reveal the underlying
# distribtution.

crossviews_plot([latent, latent],
                labels=y,
                title='Latent Variable',
                equal_axes=True)

# The noisy latent variable (view 1) is plotted against the transformed latent
# variable (view 2), an example of a dataset with two views.

crossviews_plot(Xs,
                labels=y,
                title='View 1 vs. View 2 (Polynomial Transform + noise)',
                equal_axes=True)
Beispiel #4
0
# CCA, equivalent to 2 view MCCA, learns transformations of the views,
# projecting a linear combination of the features to a component such that the
# sum of correlations between the ith components of each view is maximized. We
# see the top three components of the first two views plotted against each
# other, pairwise. The strong linear shape on the diagonals shows that the
# found components correlate well.

# the default is no regularization meaning this is SUMCORR-AVGVAR MCCA
cca = CCA(n_components=joint_rank)

# the fit-transform method outputs the scores for each view
cca_scores = cca.fit_transform(Xs[:2])
crossviews_plot(cca_scores,
                title='CCA scores (first two views fitted)',
                equal_axes=True,
                scatter_kwargs={
                    'alpha': 0.4,
                    's': 2.0
                })

# In the 2 view setting, a variety of interpretable statistics can be
# calculated. We assess the canonical correlations achieved and
# their significance using the p-values from a Wilk's Lambda test

stats = cca.stats(cca_scores)
print(f'Canonical Correlations: {stats["r"]}')
print(f'Wilk\'s Lambda Test pvalues: {stats["pF"]}')

###############################################################################
# Regularized CCA
# ^^^^^^^^^^^^^^^^
print(f'There are {len(Xs)} views.')
print(f'There are {Xs[0].shape[0]} observations')
print(f'The feature sizes are: {[X.shape[1] for X in Xs]}')

###############################################################################
# Embed Views
# ^^^^^^^^^^^

# Create GCCA object and embed the
gcca = GCCA()
Xs_latents = gcca.fit_transform(Xs)

print(f'The feature sizes are: {[X.shape[1] for X in Xs_latents]}')

###############################################################################
# Plot the first two views against each other
# -------------------------------------------
# The top three dimensions from the latents spaces of the profile correlation
# and pixel average views are plotted against each other. However, their latent
# spaces are influenced the the Karhunen-Love coefficients, not plotted.

crossviews_plot(Xs_latents[[0, 2]],
                dimensions=[0, 1, 2],
                labels=y,
                cmap='Set1',
                title='Profile correlations vs Pixel Averages',
                scatter_kwargs={
                    'alpha': 0.4,
                    's': 2.0
                })
Beispiel #6
0
# genetic types. One can use this to construct a single view
# for subsequent inference, or to examine the loading weights across views.
# Because the genetic expression data has more features than samples, we need
# to use regularization so as to not to trivially overfit.

from mvlearn.plotting import crossviews_plot  # noqa: E402
from mvlearn.embed import CCA  # noqa: E402

cca = CCA(n_components=2, regs=[0.9, 0.1])
Xs_cca = cca.fit_transform(Xs)

y_labels = [diet_names[j] + f' ({genotype_names[i]})' for (i, j) in y]
f, axes = crossviews_plot(Xs_cca,
                          labels=np.asarray(['Red', 'Blue'])[y[:, 0]],
                          ax_ticks=False,
                          figsize=(5, 5),
                          equal_axes=True,
                          title='CCA view embeddings',
                          scatter_kwargs=sca_kwargs,
                          show=False)
corr1, corr2 = cca.canon_corrs(Xs_cca)
axes[0, 0].annotate(f'1st Canonical\nCorrelation = {corr1:.2f}',
                    xy=(0.95, 0.05),
                    xycoords='axes fraction',
                    fontsize=10,
                    ha='right')
axes[1, 1].annotate(f'2nd Canonical\nCorrelation = {corr2:.2f}',
                    xy=(0.95, 0.05),
                    xycoords='axes fraction',
                    fontsize=10,
                    ha='right')
plt.show()
Beispiel #7
0

np.random.seed(1)
Xs = make_data('linear', 250)
Xs_train, Xs_test = train_test_split(Xs, test_size=0.3, random_state=42)

kcca_l = KCCA(n_components=4, reg=0.01)
kcca_l.fit(Xs_train)
linearkcca = kcca_l.transform(Xs_test)

###############################################################################
# Original Data Plotted
# ^^^^^^^^^^^^^^^^^^^^^


crossviews_plot(Xs, ax_ticks=False, ax_labels=True, equal_axes=True)

###############################################################################
# Transformed Test Data Plotted
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


crossviews_plot(linearkcca, ax_ticks=False, ax_labels=True, equal_axes=True)

# Now, we assess the canonical correlations achieved on the testing data, and
# their significance using the p-values from a Wilk's Lambda test


stats = kcca_l.get_stats()
print(stats['r'])
print(stats['pF'])
Beispiel #8
0
# added independently to both the transformed and untransformed latents.

n_samples = 2000
means = [[0, 1], [0, -1]]
covariances = [np.eye(2), np.eye(2)]
gm = GaussianMixture(n_samples,
                     means,
                     covariances,
                     random_state=42,
                     shuffle=True,
                     shuffle_random_state=42)
latent, y = gm.get_Xy(latents=True)

# Plot latent data against itself to reveal the underlying distribtution.
crossviews_plot([latent, latent],
                labels=y,
                title='Latent Variable',
                equal_axes=True)

# Split data into train and test sets
Xs, y = gm.sample_views(transform='poly', n_noise=2).get_Xy()
Xs_train, Xs_test, y_train, y_test = train_test_split(Xs,
                                                      y,
                                                      test_size=0.3,
                                                      random_state=42)

# Plot the testing data after polynomial transformation
crossviews_plot(Xs_test,
                labels=y_test,
                title='Testing Data View 1 vs. View 2 '
                '(Polynomial Transform + noise)',
                equal_axes=True)
Beispiel #9
0
# Linear Kernel
# -------------
#
# Here we show how KMCCA with a linear kernel can uncover the highly correlated
# latent distribution of the 2 views which are related with a linear
# relationship, and then transform the data into that latent space.


np.random.seed(1)
Xs = make_data('linear', 250)
Xs_train, Xs_test = train_test_split(Xs, test_size=0.3, random_state=42)

kmcca = KMCCA(n_components=4, regs=0.01)
scores = kmcca.fit_transform(Xs_test)

crossviews_plot(Xs, ax_ticks=False, ax_labels=True, equal_axes=True,
                title='Simulated data crossplot: linear setting')

crossviews_plot(scores, ax_ticks=False, ax_labels=True, equal_axes=True,
                title='Scores crossplot: linear KMCCA')

# Now, we assess the canonical correlations achieved on the testing data

print(f'Test data canonical correlations: {kmcca.canon_corrs(scores)}')

###############################################################################
# Polynomial Kernel
# -----------------

# Here we show how KMCCA with a polynomial kernel can uncover the highly
# correlated latent distribution of the 2 views which are related with a
# polynomial relationship, and then transform the data into that latent space.
Beispiel #10
0
def test_crossview_default():
    crossviews_plot(Xs)
Beispiel #11
0
def test_crossview_wrong_dimensions():
    with pytest.raises(ValueError):
        crossviews_plot(Xs, dimensions=[0, 2])
    with pytest.raises(ValueError):
        crossviews_plot(Xs, dimensions=2)