def perform_clustering(seed, m_data, labels, n_clusters): # Singleview spherical kmeans clustering # Cluster each view separately s_kmeans = SphericalKMeans(n_clusters=n_clusters, random_state=seed, n_init=100) s_clusters_v1 = s_kmeans.fit_predict(m_data[0]) s_clusters_v2 = s_kmeans.fit_predict(m_data[1]) # Concatenate the multiple views into a single view s_data = np.hstack(m_data) s_clusters = s_kmeans.fit_predict(s_data) # Compute nmi between true class labels and singleview cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multiview spherical kmeans clustering # Use the MultiviewKMeans instance to cluster the data m_kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters, n_init=100, random_state=seed) m_clusters = m_kmeans.fit_predict(m_data) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi)) return m_clusters
def test_fit_predict_n_jobs_all(data_random): n_clusters = data_random['n_clusters'] kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters, n_jobs=-1) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert(data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert(cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_init_random(data_random): n_clusters = data_random['n_clusters'] init = 'random' kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters, init='random') cluster_pred = kmeans.fit_predict(data_random['test_data']) assert(data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert(cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_patience(data_random): n_clusters = data_random['n_clusters'] patience=10 kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters, patience=patience) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert(data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert(cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_max_iter(data_random): n_clusters = data_random['n_clusters'] max_iter = 5 kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters, max_iter=max_iter) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_init_predefined(): n_clusters = 2 v1_centroid = np.array([[-1, -1],[1, 1]]) v2_centroid = np.array([[-1, -1],[1, 1]]) centroids = [v1_centroid, v2_centroid] v1_data = np.array([[-1, -1],[-2, -2],[0.5, 0.5],[0.7, 0.7],[1, 1]]) v2_data = np.array([[-1, -1],[-2, -2],[0.5, 0.5],[0.4, 0.7],[1, 1]]) data = [v1_data, v2_data] kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters, init=centroids) cluster_pred = kmeans.fit_predict(data)