def perform_clustering(seed, m_data, labels, n_clusters): # Singleview kmeans clustering # Cluster each view separately s_kmeans = KMeans(n_clusters=n_clusters, random_state=seed, n_init=100) s_clusters_v1 = s_kmeans.fit_predict(m_data[0]) s_clusters_v2 = s_kmeans.fit_predict(m_data[1]) # Concatenate the multiple views into a single view s_data = np.hstack(m_data) s_clusters = s_kmeans.fit_predict(s_data) # Compute nmi between true class labels and singleview cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multiview kmeans clustering # Use the MultiviewKMeans instance to cluster the data m_kmeans = MultiviewKMeans(n_clusters=n_clusters, n_init=100, random_state=seed) m_clusters = m_kmeans.fit_predict(m_data) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi)) return m_clusters
def test_fit_predict_n_jobs_all(data_random): n_clusters = data_random['n_clusters'] kmeans = MultiviewKMeans(n_clusters=n_clusters, n_jobs=-1) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_n_clusters(): n_clusters = 3 v1_data = np.array([[0, 0], [1, 0], [0, 1]]) v2_data = np.array([[0, 0], [1, 0], [0, 1]]) data = [v1_data, v2_data] kmeans = MultiviewKMeans(n_clusters=n_clusters) cluster_pred = kmeans.fit_predict(data) cluster_pred = list(set(cluster_pred)) assert (len(cluster_pred) == n_clusters)
def test_fit_predict_init_random(data_random): n_clusters = data_random['n_clusters'] init = 'random' kmeans = MultiviewKMeans(n_clusters=n_clusters, init='random') cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_max_iter(data_random): n_clusters = data_random['n_clusters'] max_iter = 5 kmeans = MultiviewKMeans(n_clusters=n_clusters, max_iter=max_iter) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_patience(data_random): n_clusters = data_random['n_clusters'] patience = 10 kmeans = MultiviewKMeans(n_clusters=n_clusters, patience=patience) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_init_predefined(): n_clusters = 2 v1_centroid = np.array([[0, 0], [1, 1]]) v2_centroid = np.array([[0, 0], [1, 1]]) centroids = [v1_centroid, v2_centroid] v1_data = np.array([[0, 0], [0.3, 0.2], [0.5, 0.5], [0.7, 0.7], [1, 1]]) v2_data = np.array([[0, 0], [0.2, 0.4], [0.5, 0.5], [0.4, 0.7], [1, 1]]) data = [v1_data, v2_data] kmeans = MultiviewKMeans(n_clusters=n_clusters, init=centroids) cluster_pred = kmeans.fit_predict(data)
s_data = np.hstack(Xs) s_clusters = s_kmeans.fit_predict(s_data) # Compute nmi between true class labels and singleview cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multiview kmeans clustering # Use the MultiviewKMeans instance to cluster the data m_kmeans = MultiviewKMeans(n_clusters=n_class, random_state=RANDOM_SEED) m_clusters = m_kmeans.fit_predict(Xs) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi)) ############################################################################### # Comparing predicted cluster labels vs the truth # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # We will display the clustering results of the Multiview kmeans clustering # algorithm below, along with the true class labels. # Running TSNE to display clustering results via low dimensional embedding tsne = TSNE() new_data_1 = tsne.fit_transform(Xs[0])