def test_samples_not_n_views(): with pytest.raises(ValueError): view1 = np.random.random((10, 11)) view2 = np.random.random((10, 10)) spectral = MultiviewSpectralClustering(n_views=3, random_state=RANDOM_STATE) spectral.fit_predict([view1, view2])
def perform_clustering(seed, m_data, labels, n_clusters): # Singleview spectral clustering # Cluster each view separately s_spectral = SpectralClustering(n_clusters=n_clusters, random_state=RANDOM_SEED, n_init=100) s_clusters_v1 = s_spectral.fit_predict(m_data[0]) s_clusters_v2 = s_spectral.fit_predict(m_data[1]) # Concatenate the multiple views into a single view s_data = np.hstack(m_data) s_clusters = s_spectral.fit_predict(s_data) # Compute nmi between true class labels and singleview cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multiview spectral clustering # Use the MultiviewSpectralClustering instance to cluster the data m_spectral = MultiviewSpectralClustering(n_clusters=n_clusters, random_state=RANDOM_SEED, n_init=100) m_clusters = m_spectral.fit_predict(m_data) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multiview Concatenated NMI Score: {0:.3f}\n'.format(m_nmi)) return m_clusters
def test_n_neighbors_not_positive_int(): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='nearest_neighbors', n_neighbors=-1) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='nearest_neighbors', n_neighbors=0)
def test_info_view_not_valid(): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=2, n_views=5, info_view=-1) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=2, n_views=5, info_view=6)
def test_fit_predict_default(data): v_data = data['fit_data'][:2] spectral = MultiviewSpectralClustering(2, random_state=RANDOM_STATE) predictions = spectral.fit_predict(v_data) n_clusts = data['n_clusters'] assert (predictions.shape[0] == data['n_fit']) for clust in predictions: assert (clust >= 0 and clust < n_clusts)
def test_n_iter_not_positive_int(): with pytest.raises(ValueError): view1 = np.random.random((10, 11)) view2 = np.random.random((10, 10)) spectral = MultiviewSpectralClustering(2, n_iter=-1) with pytest.raises(ValueError): view1 = np.random.random((10, 11)) view2 = np.random.random((10, 10)) spectral = MultiviewSpectralClustering(2, n_iter=0)
def test_fit_predict_info_view(data): v_data = data['fit_data'] info_view = np.random.randint(len(v_data)) n_clusts = data['n_clusters'] spectral = MultiviewSpectralClustering(n_clusts, random_state=RANDOM_STATE, info_view=info_view) predictions = spectral.fit_predict(v_data) assert (predictions.shape[0] == data['n_fit']) for clust in predictions: assert (clust >= 0 and clust < n_clusts)
def test_fit_predict_max_iter(data): v_data = data['fit_data'] max_iter = 5 n_clusts = data['n_clusters'] spectral = MultiviewSpectralClustering(n_clusts, random_state=RANDOM_STATE, max_iter=max_iter) predictions = spectral.fit_predict(v_data) assert (predictions.shape[0] == data['n_fit']) for clust in predictions: assert (clust >= 0 and clust < n_clusts)
def test_info_view_not_valid(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=2, info_view=-1) spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=2, info_view=6) spectral.fit_predict(small_data)
def test_n_init_not_positive_int(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_init=-1) spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_init=0) spectral.fit_predict(small_data)
def test_fit_predict_n_iter(data): v_data = data['fit_data'] n_views = data['n_views'] n_iter = 5 n_clusts = data['n_clusters'] spectral = MultiviewSpectralClustering(n_clusts, n_views=n_views, n_iter=n_iter) predictions = spectral.fit_predict(v_data) assert (predictions.shape[0] == data['n_fit']) for clust in predictions: assert (clust >= 0 and clust < n_clusts)
def test_not_valid_affinity(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='What') spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity=None) spectral.fit_predict(small_data)
def test_gamma_not_positive_float(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(gamma=-1.5) spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(gamma=0) spectral.fit_predict(small_data)
def test_fit_predict_info_view(data): v_data = data['fit_data'] n_views = data['n_views'] info_view = np.random.randint(n_views) print('n_views is ' + str(n_views)) print('info_views is ' + str(info_view)) n_clusts = data['n_clusters'] spectral = MultiviewSpectralClustering(n_clusts, n_views=n_views, info_view=info_view) predictions = spectral.fit_predict(v_data) assert (predictions.shape[0] == data['n_fit']) for clust in predictions: assert (clust >= 0 and clust < n_clusts)
def data(): num_fit_samples = 200 n_feats1 = 20 n_feats2 = 18 n_feats3 = 30 n_clusters = 2 n_views = 3 np.random.seed(RANDOM_STATE) fit_data = [] fit_data.append(np.random.rand(num_fit_samples, n_feats1)) fit_data.append(np.random.rand(num_fit_samples, n_feats2)) fit_data.append(np.random.rand(num_fit_samples, n_feats3)) spectral = MultiviewSpectralClustering(n_clusters, n_views=n_views, random_state=RANDOM_STATE) return { 'n_fit': num_fit_samples, 'n_feats1': n_feats1, 'n_feats2': n_feats2, 'n_feats3': n_feats3, 'n_clusters': n_clusters, 'spectral': spectral, 'fit_data': fit_data, 'n_views': n_views }
def test_n_neighbors_not_positive_int(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='nearest_neighbors', n_neighbors=-1) spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='nearest_neighbors', n_neighbors=0) spectral.fit_predict(small_data)
def test_affinity_mat_poly(data): v1_data = data['fit_data'][0] distances = cdist(v1_data, v1_data) gamma = 1 / (2 * np.median(distances)**2) true_kernel = polynomial_kernel(v1_data, gamma=gamma) spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE, affinity='poly') p_kernel = spectral._affinity_mat(v1_data) assert (p_kernel.shape[0] == data['n_fit']) assert (p_kernel.shape[1] == data['n_fit']) for ind1 in range(p_kernel.shape[0]): for ind2 in range(p_kernel.shape[1]): assert np.abs(true_kernel[ind1][ind2] - p_kernel[ind1][ind2]) < 0.000001
def test_affinity_mat_rbf2(data): v1_data = data['fit_data'][0] gamma = 1 spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE, gamma=gamma) distances = cdist(v1_data, v1_data) gamma = 1 / (2 * np.median(distances)**2) true_kernel = rbf_kernel(v1_data, gamma=1) g_kernel = spectral._affinity_mat(v1_data) assert (g_kernel.shape[0] == data['n_fit']) assert (g_kernel.shape[1] == data['n_fit']) for ind1 in range(g_kernel.shape[0]): for ind2 in range(g_kernel.shape[1]): assert np.abs(true_kernel[ind1][ind2] - g_kernel[ind1][ind2]) < 0.000001
def test_affinity_neighbors(data): v1_data = data['fit_data'][0] n_neighbors = 10 neighbors = NearestNeighbors(n_neighbors=n_neighbors) neighbors.fit(v1_data) true_kernel = neighbors.kneighbors_graph(v1_data).toarray() spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE, affinity='nearest_neighbors', n_neighbors=10) n_kernel = spectral._affinity_mat(v1_data) assert (n_kernel.shape[0] == data['n_fit']) assert (n_kernel.shape[1] == data['n_fit']) for ind1 in range(n_kernel.shape[0]): for ind2 in range(n_kernel.shape[1]): assert np.abs(true_kernel[ind1][ind2] - n_kernel[ind1][ind2]) < 0.000001
def test_samples_not_2D_1(): with pytest.raises(ValueError): view1 = np.random.random((5, 8, 7)) view2 = np.random.random((5, 9, 7)) spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE) spectral.fit_predict([view1, view2])
def test_samples_not_list(): with pytest.raises(ValueError): view1 = 1 view2 = 3 spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE) spectral.fit_predict([view1, view2])
def test_random_state_not_convertible(): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=5, random_state='ab')
def test_n_views_not_positive_int(): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=5, n_views=-1) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=5, n_views=0)
def test_samples_not_list(): with pytest.raises(ValueError): view1 = 1 view2 = 3 spectral = MultiviewSpectralClustering(2) spectral.fit_predict([view1, view2])
def test_max_iter_not_positive_int(): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(max_iter=-1) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(max_iter=0)
def test_gamma_not_positive_float(): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(gamma=-1.5) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(gamma=0)
def test_not_valid_affinity(): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='What') with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity=None)
def test_samples_not_2D_2(): with pytest.raises(ValueError): view1 = np.random.random((10, )) view2 = np.random.random((10, )) spectral = MultiviewSpectralClustering(2) spectral.fit_predict([view1, view2])
def test_n_init_not_positive_int(): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_init=-1) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_init=0)
def test_n_views_too_small2(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE) spectral.fit_predict([])