def test_not_2_views(): with pytest.raises(ValueError): view1 = np.random.random((10, )) view2 = np.random.random((10, )) view3 = np.random.random((10, )) kmeans = MultiviewKMeans() kmeans.fit([view1, view2, view3])
def perform_clustering(seed, m_data, labels, n_clusters): # Singleview kmeans clustering # Cluster each view separately s_kmeans = KMeans(n_clusters=n_clusters, random_state=seed, n_init=100) s_clusters_v1 = s_kmeans.fit_predict(m_data[0]) s_clusters_v2 = s_kmeans.fit_predict(m_data[1]) # Concatenate the multiple views into a single view s_data = np.hstack(m_data) s_clusters = s_kmeans.fit_predict(s_data) # Compute nmi between true class labels and singleview cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multiview kmeans clustering # Use the MultiviewKMeans instance to cluster the data m_kmeans = MultiviewKMeans(n_clusters=n_clusters, n_init=100, random_state=seed) m_clusters = m_kmeans.fit_predict(m_data) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi)) return m_clusters
def test_predict_no_centroids1(): with pytest.raises(AttributeError): kmeans = MultiviewKMeans() kmeans.centroids_ = [None, None] view1 = np.random.random((10, 11)) view2 = np.random.random((10, 10)) kmeans.predict([view1, view2])
def test_init_not_2_views(data_small): with pytest.raises(ValueError): view1 = np.random.random((2, 8)) view2 = np.random.random((2, 9)) view3 = np.random.random((2, 9)) kmeans = MultiviewKMeans(init=[view1, view2]) kmeans.fit(data_small)
def test_fit_predict_n_jobs_all(data_random): n_clusters = data_random['n_clusters'] kmeans = MultiviewKMeans(n_clusters=n_clusters, n_jobs=-1) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_final_centroids_no_consensus(): with pytest.raises(ConvergenceWarning): kmeans = MultiviewKMeans(random_state=RANDOM_SEED) view1 = np.array([[0, 1], [1, 0]]) view2 = np.array([[1, 0], [0, 1]]) v1_centroids = np.array([[0, 1], [1, 0]]) v2_centroids = np.array([[0, 1], [1, 0]]) centroids = [v1_centroids, v2_centroids] kmeans._final_centroids([view1, view2], centroids)
def test_final_centroids_less_than_n_clusters(): with pytest.raises(ConvergenceWarning): kmeans = MultiviewKMeans(n_clusters=3, random_state=RANDOM_SEED) view1 = np.random.random((2, 5)) view2 = np.random.random((2, 6)) v1_centroids = np.random.random((3, 5)) v2_centroids = np.random.random((3, 6)) centroids = [v1_centroids, v2_centroids] kmeans._final_centroids([view1, view2], centroids)
def test_fit_predict_max_iter(data_random): n_clusters = data_random['n_clusters'] max_iter = 5 kmeans = MultiviewKMeans(n_clusters=n_clusters, max_iter=max_iter) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_patience(data_random): n_clusters = data_random['n_clusters'] patience = 10 kmeans = MultiviewKMeans(n_clusters=n_clusters, patience=patience) cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_init_random(data_random): n_clusters = data_random['n_clusters'] init = 'random' kmeans = MultiviewKMeans(n_clusters=n_clusters, init='random') cluster_pred = kmeans.fit_predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_fit_predict_n_clusters(): n_clusters = 3 v1_data = np.array([[0, 0], [1, 0], [0, 1]]) v2_data = np.array([[0, 0], [1, 0], [0, 1]]) data = [v1_data, v2_data] kmeans = MultiviewKMeans(n_clusters=n_clusters) cluster_pred = kmeans.fit_predict(data) cluster_pred = list(set(cluster_pred)) assert (len(cluster_pred) == n_clusters)
def test_fit_predict_init_predefined(): n_clusters = 2 v1_centroid = np.array([[0, 0], [1, 1]]) v2_centroid = np.array([[0, 0], [1, 1]]) centroids = [v1_centroid, v2_centroid] v1_data = np.array([[0, 0], [0.3, 0.2], [0.5, 0.5], [0.7, 0.7], [1, 1]]) v2_data = np.array([[0, 0], [0.2, 0.4], [0.5, 0.5], [0.4, 0.7], [1, 1]]) data = [v1_data, v2_data] kmeans = MultiviewKMeans(n_clusters=n_clusters, init=centroids) cluster_pred = kmeans.fit_predict(data)
def test_predict_random_small(data_random): kmeans = MultiviewKMeans() input_data = [ data_random['fit_data'][0][:2], data_random['fit_data'][1][:2] ] kmeans.fit(input_data) cluster_pred = kmeans.predict(data_random['test_data']) assert (data_random['n_test'] == cluster_pred.shape[0]) for cl in cluster_pred: assert (cl >= 0 and cl < data_random['n_clusters'])
def test_n_init_not_positive_int(): with pytest.raises(ValueError): kmeans = MultiviewKMeans(n_init=-1) kmeans.fit(data_small) with pytest.raises(ValueError): kmeans = MultiviewKMeans(n_init=0) kmeans.fit(data_small)
def test_max_iter_not_positive_int(data_small): with pytest.raises(ValueError): kmeans = MultiviewKMeans(max_iter=-1) kmeans.fit(data_small) with pytest.raises(ValueError): kmeans = MultiviewKMeans(max_iter=0) kmeans.fit(data_small)
def data_random(): num_fit_samples = 200 num_test_samples = 5 n_feats1 = 20 n_feats2 = 18 n_clusters = 2 np.random.seed(RANDOM_SEED) fit_data = [] fit_data.append(np.random.rand(num_fit_samples, n_feats1)) fit_data.append(np.random.rand(num_fit_samples, n_feats2)) test_data = [] test_data.append(np.random.rand(num_test_samples, n_feats1)) test_data.append(np.random.rand(num_test_samples, n_feats2)) kmeans = MultiviewKMeans(n_clusters=n_clusters, random_state=RANDOM_SEED) return { 'n_test': num_test_samples, 'n_feats1': n_feats1, 'n_feats2': n_feats2, 'n_clusters': n_clusters, 'kmeans': kmeans, 'fit_data': fit_data, 'test_data': test_data }
def test_predict_deterministic(): n_clusters = 2 v1_centroid = np.array([[0, 0], [1, 1]]) v2_centroid = np.array([[0, 0], [1, 1]]) centroids = [v1_centroid, v2_centroid] v1_data = np.array([[0, 0], [0.3, 0.2], [0.5, 0.5], [0.7, 0.7], [1, 1]]) v2_data = np.array([[0, 0], [0.2, 0.4], [0.5, 0.5], [0.4, 0.7], [1, 1]]) data = [v1_data, v2_data] kmeans = MultiviewKMeans(n_clusters=n_clusters) kmeans.centroids_ = centroids cluster_pred = kmeans.predict(data) true_clusters = [0, 0, 0, 1, 1] for ind in range(len(true_clusters)): assert cluster_pred[ind] == true_clusters[ind]
def test_predict_no_centroids2(): kmeans = MultiviewKMeans() with pytest.raises(ConvergenceWarning): view1 = np.array([[0, 1], [1, 0]]) view2 = np.array([[1, 0], [0, 1]]) v1_centroids = np.array([[0, 1], [1, 0]]) v2_centroids = np.array([[0, 1], [1, 0]]) centroids = [v1_centroids, v2_centroids] kmeans._final_centroids([view1, view2], centroids) with pytest.raises(AttributeError): kmeans.predict([view1, view2])
def test_patience_not_nonnegative_int(data_small): with pytest.raises(ValueError): kmeans = MultiviewKMeans(patience=-1) kmeans.fit(data_small)
def test_init_clusters_not_same(data_small): with pytest.raises(ValueError): view1 = np.random.random((2, 8)) view2 = np.random.random((3, 9)) kmeans = MultiviewKMeans(init=[view1, view2]) kmeans.fit(data_small)
def test_samples_not_2D_2(): with pytest.raises(ValueError): view1 = np.random.random((10, )) view2 = np.random.random((10, )) kmeans = MultiviewKMeans() kmeans.fit([view1, view2])
def test_init_samples_not_list(data_small): with pytest.raises(ValueError): view1 = 1 view2 = 3 kmeans = MultiviewKMeans(init=[view1, view2]) kmeans.fit(data_small)
def test_tol_not_nonnegative_float(data_small): with pytest.raises(ValueError): kmeans = MultiviewKMeans(tol=-0.05) kmeans.fit(data_small)
def test_not_init1(data_small): with pytest.raises(ValueError): kmeans = MultiviewKMeans(init='Not_Init') kmeans.fit(data_small)
def test_predict_not_fit(): with pytest.raises(NotFittedError): kmeans = MultiviewKMeans() view1 = np.random.random((10, 11)) view2 = np.random.random((10, 10)) kmeans.predict([view1, view2])
def test_random_state_not_convertible(data_small): with pytest.raises(ValueError): kmeans = MultiviewKMeans(random_state='ab') kmeans.fit(data_small)
plt.tight_layout() plt.show() ############################################################################### # Cluster using Multiview KMeans # ------------------------------ # # We can compare the estimated clusters from # :class:`mvlearn.cluster.MultiviewKMeans` to regular # KMeans on each of the views. Multiview Kmeans clearly finds two clusters # matching the two different genotype labels observed in the prior plots. from mvlearn.cluster import MultiviewKMeans # noqa: E402 from sklearn.cluster import KMeans # noqa: E402 Xs_labels = MultiviewKMeans(n_clusters=2, random_state=0).fit_predict(Xs) X1_labels = KMeans(n_clusters=2, random_state=0).fit_predict(Xs[0]) X2_labels = KMeans(n_clusters=2, random_state=0).fit_predict(Xs[1]) sca_kwargs = {'alpha': 0.7, 's': 20} colors = np.asarray(['Red', 'Blue']) f, axes = plt.subplots(1, 3, figsize=(8, 4)) axes[0].scatter(*zip(*X_mvmds), c=colors[Xs_labels], **sca_kwargs) axes[0].set_title('Multiview Kmeans Clusters') axes[1].scatter(*zip(*X_mvmds), c=colors[X1_labels], **sca_kwargs) axes[1].set_title('View 1 Kmeans Clusters') axes[2].scatter(*zip(*X_mvmds), c=colors[X2_labels], **sca_kwargs) axes[2].set_title('View 2 Kmeans Clusters') for ax in axes: ax.set_xlabel('MVMDS component 1')
def test_samples_not_list(): with pytest.raises(ValueError): view1 = 1 view2 = 3 kmeans = MultiviewKMeans() kmeans.fit([view1, view2])
# Concatenate the multiple views into a single view s_data = np.hstack(Xs) s_clusters = s_kmeans.fit_predict(s_data) # Compute nmi between true class labels and singleview cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multiview kmeans clustering # Use the MultiviewKMeans instance to cluster the data m_kmeans = MultiviewKMeans(n_clusters=n_class, random_state=RANDOM_SEED) m_clusters = m_kmeans.fit_predict(Xs) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi)) ############################################################################### # Comparing predicted cluster labels vs the truth # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # We will display the clustering results of the Multiview kmeans clustering # algorithm below, along with the true class labels. # Running TSNE to display clustering results via low dimensional embedding tsne = TSNE()
def test_init_samples_not_2D_2(data_small): with pytest.raises(ValueError): view1 = np.random.random((2, )) view2 = np.random.random((2, )) kmeans = MultiviewKMeans(init=[view1, view2]) kmeans.fit(data_small)