Esempio n. 1
0
def test_n_clusters_not_positive_int(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=-1)
        spectral.fit_predict(small_data)
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=0)
        spectral.fit_predict(small_data)
Esempio n. 2
0
def test_info_view_not_valid(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=2, info_view=-1)
        spectral.fit_predict(small_data)
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=2, info_view=6)
        spectral.fit_predict(small_data)
Esempio n. 3
0
def test_not_valid_affinity(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(affinity='What')
        spectral.fit_predict(small_data)
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(affinity=None)
        spectral.fit_predict(small_data)
Esempio n. 4
0
def test_gamma_not_positive_float(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(gamma=-1.5)
        spectral.fit_predict(small_data)

    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(gamma=0)
        spectral.fit_predict(small_data)
Esempio n. 5
0
def test_n_neighbors_not_positive_int(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(affinity='nearest_neighbors',
                                               n_neighbors=-1)
        spectral.fit_predict(small_data)

    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(affinity='nearest_neighbors',
                                               n_neighbors=0)
        spectral.fit_predict(small_data)
Esempio n. 6
0
def test_fit_predict_default(data):

    v_data = data['fit_data'][:2]
    spectral = MultiviewSpectralClustering(2, random_state=RANDOM_STATE)
    predictions = spectral.fit_predict(v_data)
    n_clusts = data['n_clusters']

    assert (predictions.shape[0] == data['n_fit'])
    for clust in predictions:
        assert (clust >= 0 and clust < n_clusts)
Esempio n. 7
0
def test_fit_predict_info_view(data):

    v_data = data['fit_data']
    info_view = np.random.randint(len(v_data))
    n_clusts = data['n_clusters']
    spectral = MultiviewSpectralClustering(n_clusts,
                                           random_state=RANDOM_STATE,
                                           info_view=info_view)
    predictions = spectral.fit_predict(v_data)

    assert (predictions.shape[0] == data['n_fit'])
    for clust in predictions:
        assert (clust >= 0 and clust < n_clusts)
Esempio n. 8
0
def test_fit_predict_max_iter(data):

    v_data = data['fit_data']
    max_iter = 5
    n_clusts = data['n_clusters']
    spectral = MultiviewSpectralClustering(n_clusts,
                                           random_state=RANDOM_STATE,
                                           max_iter=max_iter)
    predictions = spectral.fit_predict(v_data)

    assert (predictions.shape[0] == data['n_fit'])
    for clust in predictions:
        assert (clust >= 0 and clust < n_clusts)
def perform_clustering(seed, m_data, labels, n_clusters, kernel='rbf'):

    # Single-view spectral clustering
    # Cluster each view separately
    s_spectral = SpectralClustering(n_clusters=n_clusters,
                                    random_state=RANDOM_SEED,
                                    affinity=kernel,
                                    n_init=100)
    s_clusters_v1 = s_spectral.fit_predict(m_data[0])
    s_clusters_v2 = s_spectral.fit_predict(m_data[1])

    # Concatenate the multiple views into a single view
    s_data = np.hstack(m_data)
    s_clusters = s_spectral.fit_predict(s_data)

    # Compute nmi between true class labels and single-view cluster labels
    s_nmi_v1 = nmi_score(labels, s_clusters_v1)
    s_nmi_v2 = nmi_score(labels, s_clusters_v2)
    s_nmi = nmi_score(labels, s_clusters)
    print('Single-view View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
    print('Single-view View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
    print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

    # Multi-view spectral clustering

    # Use the MultiviewSpectralClustering instance to cluster the data
    m_spectral = MultiviewSpectralClustering(n_clusters=n_clusters,
                                             random_state=RANDOM_SEED,
                                             affinity=kernel,
                                             n_init=100)
    m_clusters = m_spectral.fit_predict(m_data)

    # Compute nmi between true class labels and multi-view cluster labels
    m_nmi = nmi_score(labels, m_clusters)
    print('Multi-view Concatenated NMI Score: {0:.3f}\n'.format(m_nmi))

    return m_clusters
Esempio n. 10
0
s_nmi = nmi_score(labels, s_clusters)
print('Single-view View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
print('Single-view View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

###############################################################################
# Multiview spectral clustering
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Use the MultiviewSpectralClustering instance to cluster the data
m_spectral = MultiviewSpectralClustering(n_clusters=n_class,
                                         affinity='nearest_neighbors',
                                         max_iter=12,
                                         random_state=RANDOM_SEED,
                                         n_init=10)
m_clusters = m_spectral.fit_predict(m_data)

# Compute nmi between true class labels and multi-view cluster labels
m_nmi = nmi_score(labels, m_clusters)
print('Multi-view NMI Score: {0:.3f}\n'.format(m_nmi))

###############################################################################
# Plots of clusters produced by multi-view spectral clustering and the true
# clusters
#
# We will display the clustering results of the Multi-view spectral clustering
# algorithm below, along with the true class labels.

display_plots('Ground Truth', m_data, labels)
display_plots('Multi-view Clustering', m_data, m_clusters)
    sca_kwargs = {'alpha': 0.7, 's': 10}
    f, axes = plt.subplots(1, 2, figsize=(8, 4))
    axes[0].scatter(Xs[:, 0], Xs[:, 1], c=y_true, **sca_kwargs)
    axes[0].set_title('True labels', fontsize=14)
    axes[1].scatter(Xs[:, 0], Xs[:, 1], c=y_predicted, **sca_kwargs)
    axes[1].set_title(title, fontsize=14)
    axes[1].annotate(f'Homogeneity\nscore = {score:.2f}',
                     xy=(0.95, 0.85),
                     xycoords='axes fraction',
                     fontsize=13,
                     ha='right')
    axes[0].set_ylabel(f'{method} Component 2')
    plt.setp(axes, xticks=[], yticks=[], xlabel=f'{method} Component 1')
    plt.tight_layout()
    plt.show()


# Cluster concatenated data
sv_clust = SpectralClustering(n_clusters=4, affinity='nearest_neighbors')
sv_labels = sv_clust.fit_predict(np.hstack(Xs))

plot_clusters(Xs_pca, y, sv_labels, 'Concatenated clustering labels', 'PCA')

# Cluster multiview data
mv_clust = MultiviewSpectralClustering(n_clusters=4,
                                       affinity='nearest_neighbors')
mv_labels = mv_clust.fit_predict(Xs)

plot_clusters(Xs_mvmds, y, mv_labels, 'Multiview clustering labels', 'MVMDS')
# Now, assuming we are trying to group the samples into 4 clusters (as was
# much more obvious after using *mvlearn*'s dimensionality reduction viewing
# method), we compare multiview clustering techniques to singleview
# counterparts. Specifically, we compare 6view spectral clustering in *mvlearn*
# with single view spectral clustering from *scikit-learn*. For multiview
# clustering, all 6 full views of data (not the dimensionality-reduced data).
# For singleview comparison, we concatenate these 6 full views into a single
# large matrix, the same as what we did before for PCA.
#
# Since we have the true class labels, we assess the clustering accuracy with
# a homogeneity score.

mv_clust = MultiviewSpectralClustering(n_clusters=4,
                                       affinity='nearest_neighbors')
mvlearn_cluster_labels = mv_clust.fit_predict(Xs)

# Test the accuracy of the clustering
mv_score = homogeneity_score(y, mvlearn_cluster_labels)
print('Multiview homogeneity score: {0:.3f}'.format(mv_score))

# Use function defined at beginning of notebook to rearrange the labels
# for easier visual comparison to true labeled plot
mvlearn_cluster_labels = rearrange_labels(y, mvlearn_cluster_labels)

# Visualize the clusters in the 2-dimensional space
quick_visualize(Xs,
                labels=mvlearn_cluster_labels,
                title="Predicted Clusters",
                ax_ticks=False,
                ax_labels=False,
Esempio n. 13
0
def test_samples_not_2D_2(small_data):
    with pytest.raises(ValueError):
        view1 = np.random.random((10, ))
        view2 = np.random.random((10, ))
        spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE)
        spectral.fit_predict([view1, view2])
Esempio n. 14
0
def test_n_views_too_small2(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE)
        spectral.fit_predict([])
Esempio n. 15
0
def test_random_state_not_convertible(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=5, random_state='ab')
        spectral.fit_predict(small_data)