def perform_clustering(seed, m_data, labels, n_clusters):
    # Singleview kmeans clustering
    # Cluster each view separately
    s_kmeans = KMeans(n_clusters=n_clusters, random_state=seed, n_init=100)
    s_clusters_v1 = s_kmeans.fit_predict(m_data[0])
    s_clusters_v2 = s_kmeans.fit_predict(m_data[1])

    # Concatenate the multiple views into a single view
    s_data = np.hstack(m_data)
    s_clusters = s_kmeans.fit_predict(s_data)

    # Compute nmi between true class labels and singleview cluster labels
    s_nmi_v1 = nmi_score(labels, s_clusters_v1)
    s_nmi_v2 = nmi_score(labels, s_clusters_v2)
    s_nmi = nmi_score(labels, s_clusters)
    print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
    print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
    print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

    # Multiview kmeans clustering

    # Use the MultiviewKMeans instance to cluster the data
    m_kmeans = MultiviewKMeans(n_clusters=n_clusters,
                               n_init=100,
                               random_state=seed)
    m_clusters = m_kmeans.fit_predict(m_data)

    # Compute nmi between true class labels and multiview cluster labels
    m_nmi = nmi_score(labels, m_clusters)
    print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi))

    return m_clusters
예제 #2
0
def test_fit_predict_n_jobs_all(data_random):

    n_clusters = data_random['n_clusters']
    kmeans = MultiviewKMeans(n_clusters=n_clusters, n_jobs=-1)
    cluster_pred = kmeans.fit_predict(data_random['test_data'])

    assert (data_random['n_test'] == cluster_pred.shape[0])
    for cl in cluster_pred:
        assert (cl >= 0 and cl < data_random['n_clusters'])
예제 #3
0
def test_fit_predict_n_clusters():

    n_clusters = 3
    v1_data = np.array([[0, 0], [1, 0], [0, 1]])
    v2_data = np.array([[0, 0], [1, 0], [0, 1]])
    data = [v1_data, v2_data]
    kmeans = MultiviewKMeans(n_clusters=n_clusters)
    cluster_pred = kmeans.fit_predict(data)
    cluster_pred = list(set(cluster_pred))
    assert (len(cluster_pred) == n_clusters)
예제 #4
0
def test_fit_predict_init_random(data_random):

    n_clusters = data_random['n_clusters']
    init = 'random'
    kmeans = MultiviewKMeans(n_clusters=n_clusters, init='random')
    cluster_pred = kmeans.fit_predict(data_random['test_data'])

    assert (data_random['n_test'] == cluster_pred.shape[0])
    for cl in cluster_pred:
        assert (cl >= 0 and cl < data_random['n_clusters'])
예제 #5
0
def test_fit_predict_max_iter(data_random):

    n_clusters = data_random['n_clusters']
    max_iter = 5
    kmeans = MultiviewKMeans(n_clusters=n_clusters, max_iter=max_iter)
    cluster_pred = kmeans.fit_predict(data_random['test_data'])

    assert (data_random['n_test'] == cluster_pred.shape[0])
    for cl in cluster_pred:
        assert (cl >= 0 and cl < data_random['n_clusters'])
예제 #6
0
def test_fit_predict_patience(data_random):

    n_clusters = data_random['n_clusters']
    patience = 10
    kmeans = MultiviewKMeans(n_clusters=n_clusters, patience=patience)
    cluster_pred = kmeans.fit_predict(data_random['test_data'])

    assert (data_random['n_test'] == cluster_pred.shape[0])
    for cl in cluster_pred:
        assert (cl >= 0 and cl < data_random['n_clusters'])
예제 #7
0
def test_fit_predict_init_predefined():

    n_clusters = 2
    v1_centroid = np.array([[0, 0], [1, 1]])
    v2_centroid = np.array([[0, 0], [1, 1]])
    centroids = [v1_centroid, v2_centroid]
    v1_data = np.array([[0, 0], [0.3, 0.2], [0.5, 0.5], [0.7, 0.7], [1, 1]])
    v2_data = np.array([[0, 0], [0.2, 0.4], [0.5, 0.5], [0.4, 0.7], [1, 1]])
    data = [v1_data, v2_data]
    kmeans = MultiviewKMeans(n_clusters=n_clusters, init=centroids)
    cluster_pred = kmeans.fit_predict(data)
예제 #8
0
s_data = np.hstack(Xs)
s_clusters = s_kmeans.fit_predict(s_data)

# Compute nmi between true class labels and singleview cluster labels
s_nmi_v1 = nmi_score(labels, s_clusters_v1)
s_nmi_v2 = nmi_score(labels, s_clusters_v2)
s_nmi = nmi_score(labels, s_clusters)
print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

# Multiview kmeans clustering

# Use the MultiviewKMeans instance to cluster the data
m_kmeans = MultiviewKMeans(n_clusters=n_class, random_state=RANDOM_SEED)
m_clusters = m_kmeans.fit_predict(Xs)

# Compute nmi between true class labels and multiview cluster labels
m_nmi = nmi_score(labels, m_clusters)
print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi))

###############################################################################
# Comparing predicted cluster labels vs the truth
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# We will display the clustering results of the Multiview kmeans clustering
# algorithm below, along with the true class labels.


# Running TSNE to display clustering results via low dimensional embedding
tsne = TSNE()
new_data_1 = tsne.fit_transform(Xs[0])