def test_spectral_clustering_not_infinite_loop(capsys, monkeypatch): """Check that discretize raises LinAlgError when svd never converges. Non-regression test for #21380 """ def new_svd(*args, **kwargs): raise LinAlgError() monkeypatch.setattr(np.linalg, "svd", new_svd) vectors = np.ones((10, 4)) with pytest.raises(LinAlgError, match="SVD did not converge"): discretize(vectors)
def __regularized_spectral_clustering(adj_matrix, tau, n_clusters, algo="scan"): """ :param adj_matrix: adjacency matrix representation of graph where [m][n] >0 if there is edge and [m][n] = weight :param n_clusters: cluster partitioning constant :param algo: the clustering separation algorithm, possible value kmeans++ or scan :return: labels, number of clustering iterations needed, smallest set of cluster found, execution time """ from sklearn.cluster import k_means from sklearn.cluster._spectral import discretize regularized_laplacian = __regularized_laplacian_matrix(adj_matrix, tau) eigen_values, eigen_vectors = __eigen_solver(regularized_laplacian, n_clusters=n_clusters) if algo == "kmeans++": _, labels, _, num_iterations = k_means(eigen_vectors, n_clusters=n_clusters, return_n_iter=True) else: if n_clusters == 2: # cluster based on sign second_eigen_vector_index = np.argsort(eigen_values)[1] second_eigen_vector = eigen_vectors.T[second_eigen_vector_index] labels = [0 if val <= 0 else 1 for val in second_eigen_vector ] # use only the second eigenvector num_iterations = 1 else: # bisecting it into k-ways, use all eigenvectors labels = discretize(eigen_vectors) num_iterations = 20 # assume worst case scenario that it tooks 20 restarts smallest_cluster_size = min(np.sum(labels), abs(np.sum(labels) - len(labels))) return labels, num_iterations, smallest_cluster_size
def test_discretize(n_samples): # Test the discretize using a noise assignment matrix random_state = np.random.RandomState(seed=8) for n_class in range(2, 10): # random class labels y_true = random_state.randint(0, n_class + 1, n_samples) y_true = np.array(y_true, np.float) # noise class assignment matrix y_indicator = sparse.coo_matrix( (np.ones(n_samples), (np.arange(n_samples), y_true)), shape=(n_samples, n_class + 1)) y_true_noisy = (y_indicator.toarray() + 0.1 * random_state.randn(n_samples, n_class + 1)) y_pred = discretize(y_true_noisy, random_state) assert adjusted_rand_score(y_true, y_pred) > 0.8