Python spectral_embedding Examples, sklearn.manifold.spectral_embedding Python Examples

Example #1

0

Show file

File: test_spectral_embedding.py Project: PSSF23/scikit-learn-stream

def test_spectral_eigen_tol_auto(monkeypatch, solver):
    """Test that `eigen_tol="auto"` is resolved correctly"""
    if solver == "amg" and not pyamg_available:
        pytest.skip("PyAMG is not available.")
    X, _ = make_blobs(n_samples=200,
                      random_state=0,
                      centers=[[1, 1], [-1, -1]],
                      cluster_std=0.01)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix

    solver_func = eigsh if solver == "arpack" else lobpcg
    default_value = 0 if solver == "arpack" else None
    if solver == "amg":
        S = sparse.csr_matrix(S)

    mocked_solver = Mock(side_effect=solver_func)

    monkeypatch.setattr(_spectral_embedding, solver_func.__qualname__,
                        mocked_solver)

    spectral_embedding(S,
                       random_state=42,
                       eigen_solver=solver,
                       eigen_tol="auto")
    mocked_solver.assert_called()

    _, kwargs = mocked_solver.call_args
    assert kwargs["tol"] == default_value

Example #2

0

Show file

def test_spectral_embedding_deterministic():
    # Test that Spectral Embedding is deterministic
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    embedding_1 = spectral_embedding(sims)
    embedding_2 = spectral_embedding(sims)
    assert_array_almost_equal(embedding_1, embedding_2)

Example #3

0

Show file

File: test_spectral_embedding.py Project: amitmse/scikit-learn

def test_spectral_embedding_deterministic():
    # Test that Spectral Embedding is deterministic
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    embedding_1 = spectral_embedding(sims)
    embedding_2 = spectral_embedding(sims)
    assert_array_almost_equal(embedding_1, embedding_2)

Example #4

0

Show file

File: RSC.py Project: GiulioRossetti/cdlib

def __sklearn_spectral_clustering(adj_matrix, n_clusters):
    """
    :param adj_matrix: adjacency matrix representation of graph where [m][n] >0 if there is edge and [m][n] = weight
    :param n_clusters: cluster partitioning constant
    :return: labels, number of clustering iterations needed, smallest set of cluster found, execution time
    """
    from sklearn.cluster import k_means
    from sklearn.neighbors import kneighbors_graph
    from sklearn.manifold import spectral_embedding

    connectivity = kneighbors_graph(adj_matrix,
                                    n_neighbors=10,
                                    include_self=True)
    affinity_matrix_ = 0.5 * (connectivity + connectivity.T)

    eigen_vectors = spectral_embedding(
        affinity_matrix_,
        n_components=n_clusters,
        eigen_solver="arpack",
        eigen_tol=0.0,
        norm_laplacian=True,
        drop_first=False,
    )

    _, labels, _, num_iterations = k_means(eigen_vectors,
                                           n_clusters=n_clusters,
                                           return_n_iter=True)

    smallest_cluster_size = min(np.sum(labels),
                                abs(np.sum(labels) - labels.size))
    return labels, num_iterations, smallest_cluster_size

Example #5

0

Show file

def spectral_clustering_scores(train_test_split, random_state=0):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
        test_edges, test_edges_false = train_test_split  # 加载输入划分集

    start_time = time.time()
    sc_scores = {}

    # 进行谱聚类链接预测
    spectral_emb = spectral_embedding(adj_train, n_components=16, random_state=random_state)
    sc_score_matrix = np.dot(spectral_emb, spectral_emb.T)

    runtime = time.time() - start_time
    sc_test_roc, sc_test_ap = get_roc_score(test_edges, test_edges_false, sc_score_matrix, apply_sigmoid=True)
    sc_val_roc, sc_val_ap = get_roc_score(val_edges, val_edges_false, sc_score_matrix, apply_sigmoid=True)

    # 记录得分
    sc_scores['test_roc'] = sc_test_roc
    # sc_scores['test_roc_curve'] = sc_test_roc_curve
    sc_scores['test_ap'] = sc_test_ap

    sc_scores['val_roc'] = sc_val_roc
    # sc_scores['val_roc_curve'] = sc_val_roc_curve
    sc_scores['val_ap'] = sc_val_ap

    sc_scores['runtime'] = runtime
    return sc_scores

Example #6

0

Show file

File: SCY.py Project: bang77/SCRNA

def spectral_clustering(affinity,
                        n_clusters=8,
                        n_components=None,
                        eigen_solver=None,
                        random_state=None,
                        n_init=10,
                        eigen_tol=0.0,
                        assign_labels='kmeans'):

    if assign_labels not in ('kmeans', 'discretize',
                             'AgglomerativeClustering'):
        raise ValueError("The 'assign_labels' parameter should be "
                         "'kmeans' or 'discretize', but '%s' was given" %
                         assign_labels)

    random_state = check_random_state(random_state)
    n_components = n_clusters if n_components is None else n_components
    maps = spectral_embedding(affinity,
                              n_components=n_components,
                              eigen_solver=eigen_solver,
                              random_state=random_state,
                              eigen_tol=eigen_tol,
                              drop_first=False)
    if assign_labels == 'kmeans':
        _, labels, _ = k_means(maps, n_clusters)
    else:
        labels = discretize(maps, random_state=random_state)
    return labels

Example #7

0

Show file

File: embedding.py Project: cu-numcomp/numcomp-project-prateekmakhija9

    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X : ndarray, shape (n_trials, n_channels, n_channels)
            ndarray of SPD matrices.

        Returns
        -------
        self : object
            Returns the instance itself.

        """
        affinity_matrix = self._get_affinity_matrix(X, self.eps)
        embd = spectral_embedding(adjacency=affinity_matrix,
                                  n_components=self.n_components,
                                  norm_laplacian=True)

        # normalize the embedding between -1 and +1
        embdn = 2*(embd - embd.min(0)) / embd.ptp(0) - 1

        self.embedding_ = embdn

        return self

Example #8

0

Show file

File: embedding.py Project: pyRiemann/pyRiemann

    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X : ndarray, shape (n_matrices, n_channels, n_channels)
            Set of SPD matrices.
        y : None
            Not used, here for compatibility with sklearn API.

        Returns
        -------
        self : object
            Returns the instance itself.

        """
        _check_dimensions(X, n_components=self.n_components)

        affinity_matrix = self._get_affinity_matrix(X, self.eps)
        embd = spectral_embedding(adjacency=affinity_matrix,
                                  n_components=self.n_components,
                                  norm_laplacian=True)

        # normalize the embedding between -1 and +1
        embdn = 2 * (embd - embd.min(0)) / embd.ptp(0) - 1

        self.embedding_ = embdn

        return self

Example #9

0

Show file

File: link_prediction_scores.py Project: srinath523/facebook-link-predection

def spectral_clustering_scores(train_test_split, random_state=0):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
        test_edges, test_edges_false = train_test_split # Unpack input

    start_time = time.time()
    sc_scores = {}

    # Perform spectral clustering link prediction
    spectral_emb = spectral_embedding(adj_train, n_components=16, random_state=random_state)
    sc_score_matrix = np.dot(spectral_emb, spectral_emb.T)

    runtime = time.time() - start_time
    sc_test_roc, sc_test_ap = get_roc_score(test_edges, test_edges_false, sc_score_matrix, apply_sigmoid=True)
    sc_val_roc, sc_val_ap = get_roc_score(val_edges, val_edges_false, sc_score_matrix, apply_sigmoid=True)

    # Record scores
    sc_scores['test_roc'] = sc_test_roc
    # sc_scores['test_roc_curve'] = sc_test_roc_curve
    sc_scores['test_ap'] = sc_test_ap

    sc_scores['val_roc'] = sc_val_roc
    # sc_scores['val_roc_curve'] = sc_val_roc_curve
    sc_scores['val_ap'] = sc_val_ap

    sc_scores['runtime'] = runtime
    return sc_scores

Example #10

0

Show file

def order_func(times, data):
    this_data = data[:, (times > 0.0) & (times < 0.350)]
    this_data /= np.sqrt(np.sum(this_data**2, axis=1))[:, np.newaxis]
    return np.argsort(
        spectral_embedding(rbf_kernel(this_data, gamma=1.),
                           n_components=1,
                           random_state=0).ravel())

Example #11

0

Show file

File: generate_embeddings.py Project: MH-0/RPGAE

def matrix_factorization(model_name):
    """generate embedding by laplacian eigenmaps"""

    embedding = spectral_embedding(nx.adjacency_matrix(ut.graph), n_components=64)

    # save embedding
    np.save(ut.embedding_path + model_name + "_embedding.npy", embedding)
    return embedding

Example #12

0

Show file

def test_spectral_embedding_amg_solver_failure(dtype, seed=36):
    # Non-regression test for amg solver failure (issue #13393 on github)
    num_nodes = 100
    X = sparse.rand(num_nodes, num_nodes, density=0.1, random_state=seed)
    X = X.astype(dtype)
    upper = sparse.triu(X) - sparse.diags(X.diagonal())
    sym_matrix = upper + upper.T
    embedding = spectral_embedding(
        sym_matrix, n_components=10, eigen_solver="amg", random_state=0
    )

    # Check that the learned embedding is stable w.r.t. random solver init:
    for i in range(3):
        new_embedding = spectral_embedding(
            sym_matrix, n_components=10, eigen_solver="amg", random_state=i + 1
        )
        _assert_equal_with_sign_flipping(embedding, new_embedding, tol=0.05)

Example #13

0

Show file

def get_spectral_coords(ad, n_dim = 2, epsilon = 1e-14):
  max_val = np.amax(ad)
  # ad = (max_val + epsilon) - ad
  # ad = 1/(ad + epsilon)
  nonzero = np.nonzero(ad)
  ad[nonzero] = (max_val + epsilon) - ad[nonzero]

  # print(ad)
  return manifold.spectral_embedding(ad, n_components=n_dim)

Example #14

0

Show file

File: higherordercheeger.py Project: angeris/ElectricalSparsestCut

def spectral_embedding(graph, num_sets):
    embedding = manifold.spectral_embedding(graph,
                                            n_components=2 * num_sets,
                                            eigen_solver=None,
                                            random_state=None,
                                            eigen_tol=0.0,
                                            norm_laplacian=False,
                                            drop_first=True)
    embedding = normalize(embedding, axis=0, norm='l2')
    return embedding

Example #15

0

Show file

def test_spectral_embedding_copy_variable(seed=36):
    # Test that when "copy" input variable is set to False
    # spectral_embedding returns the correct value
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    n_components = 8
    embedding_1 = spectral_embedding(sims,
                                     n_components=n_components,
                                     drop_first=False,
                                     copy=False)

    # Verify with copy True or False
    embedding_2 = spectral_embedding(sims,
                                     n_components=n_components,
                                     drop_first=False,
                                     copy=True)

    assert_array_almost_equal(embedding_1, embedding_2)

Example #16

0

Show file

File: lumap.py Project: dshieble/FunctorialManifoldLearning

def fit_lumap(X, n_neighbors, metric, n_components=2):
    """
    Build the fuzzy simplices UMAP-style (via fuzzy unions of local metric spaces) and then
        fit the matrix Laplacian Eigenmaps style (via graph laplacian)  
    """
    sparse_graph, sigmas, rhos = fuzzy_simplicial_set(
        X=X,
        random_state=check_random_state(0),
        n_neighbors=n_neighbors,
        metric=metric)
    return spectral_embedding(sparse_graph, n_components=n_components)

Example #17

0

Show file

def node_features(graph, k):
    '''
    功能描述：根据给出的图graph，求其给定的k维的node feature
    输入参数：需要求的node feature的图
    输出参数：图上每个节点的feature
    '''
    adj_matrix = graph_tool.spectral.adjacency(graph, weight=None, index=None)
    adj_matrix = adj_matrix.todense()
    print(adj_matrix.shape)
    node_feature = spectral_embedding(adj_matrix, k)
    return node_feature

Example #18

0

Show file

def spectral_clustering(affinity,
                        n_clusters=8,
                        n_components=None,
                        eigen_solver=None,
                        random_state=None,
                        n_init=10,
                        eigen_tol=0.0,
                        assign_labels='kmeans',
                        fuzzy_m=2,
                        fuzzy_error=0.0005,
                        fuzzy_maxiter=10000,
                        fuzzy_label_threshold=None):
    if assign_labels not in ('kmeans', 'fuzzy_cmeans', 'discretize'):
        raise ValueError(
            "The 'assign_labels' parameter should be "
            "'kmeans', 'fuzzy_cmeans' or 'discretize', but '%s' was given" %
            assign_labels)

    random_state_ = sp.check_random_state(random_state)
    n_components = n_clusters if n_components is None else n_components
    maps = spectral_embedding(affinity,
                              n_components=n_components,
                              eigen_solver=eigen_solver,
                              random_state=random_state,
                              eigen_tol=eigen_tol,
                              drop_first=False)

    if assign_labels == 'kmeans':
        _, labels, _ = sp.k_means(maps,
                                  n_clusters,
                                  random_state=random_state_,
                                  n_init=n_init)
    elif assign_labels == 'fuzzy_cmeans':
        if fuzzy_label_threshold is None:
            fuzzy_label_threshold = 1. / n_clusters

        _, u, _, _, _, _, _ = fuzz.cluster.cmeans(np.exp(maps.T),
                                                  n_clusters,
                                                  seed=random_state,
                                                  m=fuzzy_m,
                                                  error=fuzzy_error,
                                                  maxiter=fuzzy_maxiter)
        # from sklearn.mixture import GMM
        # gmm = GMM(n_components=n_clusters, covariance_type='full', random_state=random_state, n_init=n_init).fit(maps)
        # u = gmm.predict_proba(maps)
        # u = u.T
        assignments = np.argwhere(u.T >= fuzzy_label_threshold)
        labels = [[] for _ in range(u.shape[1])]
        for row in assignments:
            labels[row[0]].append(row[1])
    else:
        labels = sp.discretize(maps, random_state=random_state_)

    return labels

Example #19

0

Show file

    def spectral_embedding(self, n):
        """
        Embed the points using spectral decomposition of the laplacian of
        the affinity matrix

        Parameters
        ----------
        n:      int
                The number of dimensions
        """
        coords = spectral_embedding(self._affinity, n)
        return CoordinateMatrix(normalise_rows(coords))

Example #20

0

Show file

 def embed(self,save=False):
     try:
         embedding = np.load("./results2/embedding.npy")
     except:
         G = self.indexedGraph()
         "starting manifold learning..."
         A = nx.adjacency_matrix(G, nodelist=G.nodes())
         embedding = manifold.spectral_embedding(A, n_components=self.embedding_n)
         if save:
             np.save("./results2/embedding.npy",embedding)
     self.embedding = embedding
     return embedding

Example #21

0

Show file

File: clustering.py Project: kgori/treeCl

    def spectral_embedding(self, n):
        """
        Embed the points using spectral decomposition of the laplacian of
        the affinity matrix

        Parameters
        ----------
        n:      int
                The number of dimensions
        """
        coords = spectral_embedding(self._affinity, n)
        return CoordinateMatrix(normalise_rows(coords))

Example #22

0

Show file

def my_spectral(graph):
    ad = nx.to_numpy_array(graph)
    a = manifold.spectral_embedding(ad, n_components=2)
    xs = a[:, 0]
    ys = a[:, 1]

    plt.scatter(xs, ys)

    for i in range(len(xs)):
        plt.annotate(i, (xs[i], ys[i]))

    plt.show()

Example #23

0

Show file

File: test.py Project: ErikValle/PA3

    def test_normalized_embedding(self):
        x = np.array([[1, 0], [0, 1], [3, 0], [4, 1]])
        sc = SpectralClustering(2)
        sc.affinity_matrix_ = sc._get_affinity_matrix(x)

        embedding_features_standard = spectral_embedding(sc.affinity_matrix_, n_components=2,
                                                         norm_laplacian=True, drop_first=False)
        embedding_features = sc._get_embedding(norm_laplacian=True)
        all_one_vector = embedding_features[:, 0] / embedding_features[0, 0]
        assert_array_almost_equal(all_one_vector, np.ones(4))
        second_vector = embedding_features[:, 1] / embedding_features[0, 1]
        second_vector_standard = embedding_features_standard[:, 1] / embedding_features_standard[0, 1]
        assert_array_almost_equal(second_vector, second_vector_standard)

Example #24

0

Show file

File: test_spectral_embedding.py Project: zergioz/scikit-learn

def test_spectral_embedding_amg_solver_failure():
    # Non-regression test for amg solver failure (issue #13393 on github)
    pytest.importorskip('pyamg')
    seed = 36
    num_nodes = 100
    X = sparse.rand(num_nodes, num_nodes, density=0.1, random_state=seed)
    upper = sparse.triu(X) - sparse.diags(X.diagonal())
    sym_matrix = upper + upper.T
    embedding = spectral_embedding(sym_matrix,
                                   n_components=10,
                                   eigen_solver='amg',
                                   random_state=0)

    # Check that the learned embedding is stable w.r.t. random solver init:
    for i in range(3):
        new_embedding = spectral_embedding(sym_matrix,
                                           n_components=10,
                                           eigen_solver='amg',
                                           random_state=i + 1)
        assert _check_with_col_sign_flipping(embedding,
                                             new_embedding,
                                             tol=0.05)

Example #25

0

Show file

def fit_laplacian_eigenmaps(X,
                            n_neighbors=20,
                            metric=EUCLIDEAN,
                            n_components=2):
    """
    spectral_embedding expects an affinity matrix that already has the similarity kernel applied.
        We apply the exponent here to be consistent with the mapping from distances to fuzzy
        simplices (affinities) via -log
    """
    print("Computing adjacency_matrix with {} neighbors and {} metric".format(
        n_neighbors, metric))
    graph = get_adjacency_matrix(X=X, n_neighbors=n_neighbors, metric=metric)
    print("Computing spectral_embedding")
    return spectral_embedding(graph, n_components=2)

Example #26

0

Show file

File: test_spectral_embedding.py Project: BTY2684/scikit-learn

def test_spectral_embedding_unnormalized():
    # Test that spectral_embedding is also processing unnormalized laplacian correctly
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    n_components = 8
    embedding_1 = spectral_embedding(sims, norm_laplacian=False, n_components=n_components, drop_first=False)

    # Verify using manual computation with dense eigh
    laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True)
    _, diffusion_map = eigh(laplacian)
    embedding_2 = diffusion_map.T[:n_components] * dd
    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T

    assert_array_almost_equal(embedding_1, embedding_2)

Example #27

0

Show file

File: test_manifold.py Project: Sandy4321/pandas-ml

    def test_spectral_embedding(self):
        N = 10
        m = np.random.random_integers(50, 200, size=(N, N))
        m = (m + m.T) / 2

        df = pdml.ModelFrame(m)
        self.assert_numpy_array_almost_equal(df.data.values, m)

        result = df.manifold.spectral_embedding(random_state=self.random_state)
        expected = manifold.spectral_embedding(m, random_state=self.random_state)

        self.assertTrue(isinstance(result, pdml.ModelFrame))
        self.assert_index_equal(result.index, df.index)
        # signs can be inversed
        self.assert_numpy_array_almost_equal(np.abs(result.data.values),
                                             np.abs(expected))

Example #28

0

Show file

    def test_spectral_embedding(self):
        N = 10
        m = np.random.random_integers(50, 200, size=(N, N))
        m = (m + m.T) / 2

        df = pdml.ModelFrame(m)
        self.assert_numpy_array_almost_equal(df.data.values, m)

        result = df.manifold.spectral_embedding(random_state=self.random_state)
        expected = manifold.spectral_embedding(m, random_state=self.random_state)

        self.assertIsInstance(result, pdml.ModelFrame)
        self.assert_index_equal(result.index, df.index)
        # signs can be inversed
        self.assert_numpy_array_almost_equal(np.abs(result.data.values),
                                             np.abs(expected))

Example #29

0

Show file

def spectral_clustering_scores(train_test_split, random_state=0):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split
    start_time = time.time()
    sc_score = {}

    # spectral clustering
    spectral_emb = spectral_embedding(adj_train,
                                      n_components=16,
                                      random_state=random_state)
    sc_score_matrix = np.dot(spectral_emb, spectral_emb.T)

    train_edges_corr, train_edges_label = get_correlation(
        train_edges, train_edges_false, sc_score_matrix)
    if len(val_edges) > 0 and len(val_edges_false) > 0:
        val_edges_corr, val_edges_label = get_correlation(
            val_edges, val_edges_false, sc_score_matrix)
    test_edges_corr, test_edges_label = get_correlation(
        test_edges, test_edges_false, sc_score_matrix)

    classifier = get_prediction_model(train_edges_corr, train_edges_label)
    if len(val_edges) > 0 and len(val_edges_false) > 0:
        val_preds = classifier.predict(val_edges_corr)
    test_preds = classifier.predict(test_edges_corr)

    run_time = time.time() - start_time

    if len(val_edges) > 0 and len(val_edges_false) > 0:
        sc_val_roc = roc_auc_score(val_edges_label, val_preds)
        sc_val_avg = average_precision_score(val_edges_label, val_preds)
    else:
        sc_val_roc = None
        sc_val_avg = None

    sc_test_roc = roc_auc_score(test_edges_label, test_preds)
    sc_test_avg = average_precision_score(test_edges_label, test_preds)

    run_time = time.time() - start_time
    # sc_test_roc, sc_test_ap = get_roc_score(test_edges, test_edges_false, sc_score_matrix, apply_sigmoid=True)
    # sc_val_roc, sc_val_ap = get_roc_score(val_edges, val_edges_false, sc_score_matrix, apply_sigmoid=True)

    sc_score['test_roc'] = sc_test_roc
    sc_score['test_ap'] = sc_test_avg
    sc_score['val_roc'] = sc_val_roc
    sc_score['val_ap'] = sc_val_avg
    sc_score['run_time'] = run_time
    return sc_score

Example #30

0

Show file

def test_spectral_embedding_first_eigen_vector():
    # Test that the first eigenvector of spectral_embedding
    # is constant and that the second is not (for a connected graph)
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    n_components = 2

    for seed in range(10):
        embedding = spectral_embedding(sims,
                                       norm_laplacian=False,
                                       n_components=n_components,
                                       drop_first=False,
                                       random_state=seed)

        assert np.std(embedding[:, 0]) == pytest.approx(0)
        assert np.std(embedding[:, 1]) > 1e-3

Example #31

0

Show file

File: test_spectral_embedding.py Project: AlexisMignon/scikit-learn

def test_spectral_embedding_first_eigen_vector():
    # Test that the first eigenvector of spectral_embedding
    # is constant and that the second is not (for a connected graph)
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    n_components = 2

    for seed in range(10):
        embedding = spectral_embedding(sims,
                                       norm_laplacian=False,
                                       n_components=n_components,
                                       drop_first=False,
                                       random_state=seed)

        assert np.std(embedding[:, 0]) == pytest.approx(0)
        assert np.std(embedding[:, 1]) > 1e-3

Example #32

0

Show file

File: LaplacianEigenmap.py Project: philipz1/ML

def le(data, k = 10, target_dim = 2):
	graph = KNN.knn(data, k)
	A = construct_mesh(data, graph)
	from sklearn import manifold
	return(manifold.spectral_embedding(A, 2))

	D = construct_degree(A)
	L = D - A

	eigvals, eigvecs = scipy.linalg.eigh(A, L)

	index = np.argsort(eigvals)[::-1]
	eigvals = eigvals[index]
	eigvecs = eigvecs[:,index]
	
	return eigvecs[:,1:target_dim + 1]

# print(le(npdata))

Example #33

0

Show file

File: utils_dictionary.py Project: hbp-brain-charting/public_analysis_code

def initial_dictionary(
    n_clusters,
    X,
):
    """Creat initial dictionary"""
    from sklearn.cluster import MiniBatchKMeans
    kmeans = MiniBatchKMeans(n_clusters=n_clusters,
                             random_state=0,
                             batch_size=200,
                             n_init=10)
    kmeans = kmeans.fit(X.T)
    dictionary_ = kmeans.cluster_centers_
    dictionary = (dictionary_.T / np.sqrt((dictionary_**2).sum(1))).T
    similarity = np.exp(np.corrcoef(dictionary))
    embedding = spectral_embedding(similarity, n_components=1)
    order = np.argsort(embedding.T).ravel()
    dictionary = dictionary[order]
    return dictionary

Example #34

0

Show file

File: test_spectral_embedding.py Project: dmiruke/aurora_detection

def test_spectral_embedding_unnormalized():
    # Test that spectral_embedding is also processing unnormalized laplacian correctly
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    n_components = 8
    embedding_1 = spectral_embedding(sims,
                                     norm_laplacian=False,
                                     n_components=n_components,
                                     drop_first=False)

    # Verify using manual computation with dense eigh
    laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True)
    _, diffusion_map = eigh(laplacian)
    embedding_2 = diffusion_map.T[:n_components] * dd
    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T

    assert_array_almost_equal(embedding_1, embedding_2)

Example #35

0

Show file

File: LaplacianEigenmap.py Project: philipz1/ML

def le(data, k=10, target_dim=2):
    graph = KNN.knn(data, k)
    A = construct_mesh(data, graph)
    from sklearn import manifold
    return (manifold.spectral_embedding(A, 2))

    D = construct_degree(A)
    L = D - A

    eigvals, eigvecs = scipy.linalg.eigh(A, L)

    index = np.argsort(eigvals)[::-1]
    eigvals = eigvals[index]
    eigvecs = eigvecs[:, index]

    return eigvecs[:, 1:target_dim + 1]


# print(le(npdata))

Example #36

0

Show file

def compute_manifold_eigenvector(graph: nx.Graph,
                                 num: int,
                                 normalised: bool = False):
    """
    Computes the eigenvectors through the amg solver
    :param graph: graph on which the eigenvectors are computed
    :param num: number of eigenvectors to be computed
    :param normalised: flag defining whether the Laplacian matrix should be normalised or not
    :return: embedding whose columns are the eigenvectors
    """
    embedding = spectral_embedding(
        nx.adjacency_matrix(graph),
        n_components=num,
        eigen_solver='amg',
        random_state=0,  # int(os.environ["random_state_embedding"]),
        eigen_tol=0.0,
        drop_first=False,
        norm_laplacian=normalised)
    return embedding

Example #37

0

Show file

File: scikit_example.py Project: zedoul/AnomalyDetection

assign_undirected_weight(W,1,3,0.22)
assign_undirected_weight(W,1,4,0.24)
assign_undirected_weight(W,2,3,0.2)
assign_undirected_weight(W,2,4,0.19)
assign_undirected_weight(W,3,4,1)

D = np.zeros((n,n))
for i in V:
    D[i,i] = np.sum(W[i,:])
D[D == 0] = 1e-8 #don't laugh, there is a core package in R actually does this

print W
print D

D_hat = D**(-0.5)
L = D_hat * W * D_hat
print L
print "=================="
#labels = spectral_clustering(A, n_clusters = 2)
random_state = check_random_state(None)
maps = spectral_embedding(L, n_components = 2, 
    eigen_solver = None, 
    random_state = random_state,
    eigen_tol = 0.0,
    drop_first= False)
print maps

_, labels, _ = k_means(maps,n_clusters=2,random_state=random_state,n_init=10)
print labels

Example #38

0

Show file

File: group_parcellation.py Project: HaxbyLab/frontiers_2014

def reproducibility_selection(
    X, grp_mask, niter=2, method='ward', k_range=KRANGE, write_dir='/tmp',
    verbose=True):
    """ Returns a reproducibility metric on bootstraped models
    
    Parameters
    ----------
    X: array of shape (n_voxels, n_contrasts, n_subjects)
       the input data
    grp_mask: array of shape (image_shape),
              the non-zeros elements yield the spatial model
    niter: int, number of bootstrap samples estimated
    method: string, one of 'ward', 'kmeans', 'spectral'
    k_range: list of ints, 
             the possible number of parcels to be tested
    """
    n_voxels, n_contrasts, n_subjects = X.shape
    n_components = 100

    # Define a spatial model
    shape = grp_mask.shape
    connectivity = grid_to_graph(shape[0], shape[1], shape[2], grp_mask).tocsr()
    # concatenate the data spatially
    Xv = np.reshape(X, (n_voxels, n_contrasts * n_subjects))
    # pre-computed stuff
    ic, jc = connectivity.nonzero()
    sigma = np.sum((Xv[ic] - Xv[jc]) ** 2, 1).mean()
    
    maps = []
    for i in range(niter):
        bootstrap = (np.random.rand(Xv.shape[1]) * Xv.shape[1]).astype(int)
        X_ = Xv[:, bootstrap]
        if method == 'spectral':
            connectivity.data = np.exp( 
                - np.sum((X_[ic] - X_[jc]) ** 2, 1) / (2 * sigma))
            maps.append(spectral_embedding(connectivity,
                                           n_components=n_components,
                                           eigen_solver='arpack',
                                           random_state=None,
                                           eigen_tol=0.0, drop_first=False))
        else:
            maps.append(PCA(n_components=n_components).fit_transform(X_))
            
    ars_score = {}
    ami_score = {}
    vm_score = {}
    for (ik, k_) in enumerate(k_range):
        label_ = []
        for i in range(niter):
            bootstrap = (np.random.rand(Xv.shape[1]) * Xv.shape[1]).astype(int)
            if method == 'spectral':
                if k_ <= n_components:
                    for _ in range(10):
                        labels = discretize(maps[i][:, :k_])
                        if len(np.unique(labels)) == k_:
                            break
                else:
                    _, labels, _ = k_means(
                        maps[i], n_clusters=k_, n_init=1,
                        precompute_distances=False, max_iter=10)
            elif method == 'ward':
                    ward = Ward(n_clusters=k_, 
                                connectivity=connectivity).fit(maps[i])
                    labels = ward.labels_
            elif method in ['k-means', 'kmeans']:
                _, labels, _ = k_means(maps[i], n_clusters=k_, n_init=1,
                                       precompute_distances=False, max_iter=10)
            elif method == 'geometric':
                xyz = np.array(np.where(grp_mask)).T
                _, labels, _ = k_means(xyz, n_clusters=k_, n_init=1,
                                       precompute_distances=False, max_iter=10)
            label_.append(labels)
        ars_score[k_] = reproducibility_rating(label_, 'ars')
        ami_score[k_] = reproducibility_rating(label_, 'ami')
        vm_score[k_] = reproducibility_rating(label_, 'vm')
        if verbose:
            print 'k: ', k_, '  ari: ', ars_score[k_], 'ami: ',ami_score[k_],\
                ' vm: ', vm_score[k_]
    file = open(path.join(write_dir, 'ari_score_%s.pck' % method), 'w')
    pickle.dump(ars_score, file)
    file = open(path.join(write_dir, 'ami_score_%s.pck' % method), 'w')
    pickle.dump(ami_score, file)
    file = open(path.join(write_dir, 'vm_score_%s.pck' % method), 'w')
    pickle.dump(vm_score, file)
    return ars_score, ami_score, vm_score

Example #39

0

Show file

File: group_parcellation.py Project: HaxbyLab/frontiers_2014

def parcel_cv(X, grp_mask, write_dir='/tmp/', method='ward', n_folds=10, 
              k_range=KRANGE, verbose=True):
    """ Functiond edicated to parcel selection using 10-fold cross-validation"""
    from sklearn.cross_validation import KFold, ShuffleSplit
    # Define the structure A of the data. Pixels connected to their neighbors.
    n_voxels, n_contrasts, n_subjects = X.shape
    n_components = 100

    # Define a spatial model
    shape = grp_mask.shape
    connectivity = grid_to_graph(shape[0], shape[1], shape[2], grp_mask).tocsr()
    ic, jc = connectivity.nonzero()

    # concatenate the data spatially
    Xv = np.reshape(X, (n_voxels, n_contrasts * n_subjects))
    sigma = np.sum((Xv[ic] - Xv[jc]) ** 2, 1).mean()

    # pre-compute PCA for the cross_validation loops
    if n_folds == int(n_folds):
        cv = KFold(X.shape[2], n_folds)
    else:
        cv = ShuffleSplit(X.shape[2], 10, .2)
    maps = []
    for (train, test) in cv:
        X_ = np.reshape(X[:, :, train], (n_voxels, n_contrasts * len(train)))
        
        if method == 'spectral':
            connectivity.data = np.exp( 
                - np.sum((X_[ic] - X_[jc]) ** 2, 1) / (2 * sigma))
            maps.append(spectral_embedding(
                    connectivity, n_components=n_components,
                    eigen_solver='arpack', random_state=None,
                    eigen_tol=0.0, drop_first=False))
        else:
            maps.append(PCA(n_components=n_components).fit_transform(X_))

    # parcel selection
    all_crit = {}
    for k in k_range:
        ll, ll_cv = 0, 0
        for (it, (train, test)) in enumerate(cv):
            if method == 'ward':
                ward = Ward(n_clusters=k, 
                            connectivity=connectivity).fit(maps[it])
                labels = ward.labels_
            elif method in ['k-means', 'kmeans']:
                _, labels, _ = k_means(maps[it], n_clusters=k, n_init=1,
                         precompute_distances=False, max_iter=10)
            elif method == 'spectral':
                 if k <= n_components:
                     for i in range(10):
                         labels = discretize(maps[it][:, :k])
                         if len(np.unique(labels)) == k:
                             break
                 else:
                     _, labels, _ = k_means(
                         maps[it], n_clusters=k, n_init=1,
                         precompute_distances=False, max_iter=10)
            elif method == 'geometric':
                xyz = np.array(np.where(grp_mask)).T
                _, labels, _ = k_means(xyz, n_clusters=k, n_init=1,
                                       precompute_distances=False, max_iter=10)
            for contrast in range(n_contrasts):
                ll1, mu_, sigma1_, sigma2_, bic_ = parameter_map(
                    X[:, contrast, train], labels, null=False)
                ll += ll1.sum()
                ll2 = log_likelihood_map(
                    X[:, contrast, test], labels, mu_, sigma1_, sigma2_)

                ll_cv += ll2.sum()
        all_crit[k] = ll_cv
        if verbose:
            print 'k: ', k, 'll: ', ll, ' ll_cv: ', ll_cv
    
    file = open(path.join( write_dir, 'll_cv_%s.pck' % method), 'w')
    pickle.dump(all_crit, file)
    return all_crit

Example #40

0

Show file

File: spectral.py Project: keflavich/SCIMES-1

def spectral_clustering(affinity, n_clusters=8, n_components=None,
                        eigen_solver=None, random_state=None, n_init=10,
                        k=None, eigen_tol=0.0,
                        assign_labels='kmeans',
                        mode=None):
    """Apply clustering to a projection to the normalized laplacian.

    In practice Spectral Clustering is very useful when the structure of
    the individual clusters is highly non-convex or more generally when
    a measure of the center and spread of the cluster is not a suitable
    description of the complete cluster. For instance when clusters are
    nested circles on the 2D plan.

    If affinity is the adjacency matrix of a graph, this method can be
    used to find normalized graph cuts.

    Parameters
    -----------
    affinity: array-like or sparse matrix, shape: (n_samples, n_samples)
        The affinity matrix describing the relationship of the samples to
        embed. **Must be symmetric**.

        Possible examples:
          - adjacency matrix of a graph,
          - heat kernel of the pairwise distance matrix of the samples,
          - symmetric k-nearest neighbours connectivity matrix of the samples.

    n_clusters: integer, optional
        Number of clusters to extract.

    n_components: integer, optional, default is k
        Number of eigen vectors to use for the spectral embedding

    eigen_solver: {None, 'arpack' or 'amg'}
        The eigenvalue decomposition strategy to use. AMG requires pyamg
        to be installed. It can be faster on very large, sparse problems,
        but may also lead to instabilities

    random_state: int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization
        of the lobpcg eigen vectors decomposition when eigen_solver == 'amg'
        and by the K-Means initialization.

    n_init: int, optional, default: 10
        Number of time the k-means algorithm will be run with different
        centroid seeds. The final results will be the best output of
        n_init consecutive runs in terms of inertia.

    eigen_tol : float, optional, default: 0.0
        Stopping criterion for eigendecomposition of the Laplacian matrix
        when using arpack eigen_solver.

    assign_labels : {'kmeans', 'discretize'}, default: 'kmeans'
        The strategy to use to assign labels in the embedding
        space.  There are two ways to assign labels after the laplacian
        embedding.  k-means can be applied and is a popular choice. But it can
        also be sensitive to initialization. Discretization is another
        approach which is less sensitive to random initialization. See
        the 'Multiclass spectral clustering' paper referenced below for
        more details on the discretization approach.

    Returns
    -------
    labels: array of integers, shape: n_samples
        The labels of the clusters.

    References
    ----------

    - Normalized cuts and image segmentation, 2000
      Jianbo Shi, Jitendra Malik
      http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324

    - A Tutorial on Spectral Clustering, 2007
      Ulrike von Luxburg
      http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323

    - Multiclass spectral clustering, 2003
      Stella X. Yu, Jianbo Shi
      http://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf

    Notes
    ------
    The graph should contain only one connect component, elsewhere
    the results make little sense.

    This algorithm solves the normalized cut for k=2: it is a
    normalized spectral clustering.
    """
    if not assign_labels in ('kmeans', 'discretize'):
        raise ValueError("The 'assign_labels' parameter should be "
                         "'kmeans' or 'discretize', but '%s' was given"
                         % assign_labels)

    if not k is None:
        warnings.warn("'k' was renamed to n_clusters and will "
                      "be removed in 0.15.",
                      DeprecationWarning)
        n_clusters = k
    if not mode is None:
        warnings.warn("'mode' was renamed to eigen_solver "
                      "and will be removed in 0.15.",
                      DeprecationWarning)
        eigen_solver = mode

    random_state = check_random_state(random_state)
    n_components = n_clusters if n_components is None else n_components
    maps = spectral_embedding(affinity, n_components=n_components,
                              eigen_solver=eigen_solver,
                              random_state=random_state,
                              eigen_tol=eigen_tol, drop_first=False)

    if assign_labels == 'kmeans':
        _, labels, _ = k_means(maps, n_clusters, random_state=random_state,
                               n_init=n_init)
    else:
        labels = discretize(maps, random_state=random_state)

    return labels, maps

Example #41

0

Show file

File: drc.py Project: juliaprocess/ml_examples

def drc(X, k, Gamma=0.5, Const=1.0):	# X = data (n,d), k = num of clusters, gamma = 1/sigma^2
	n = X.shape[0]
	d = X.shape[1]


	A = np.eye(d)
	H = np.eye(n) - (1.0/n)*np.ones((n,n))
	U_converged = False
	delta = 0.001
	escape_count = 20
	output = {}
	
	
	##	Calculate initial U
#	[output['init_allocation'], U] = spectral_clustering(X,k, 3)
#	print output['init_allocation']
#	output['allocation'] =  output['init_allocation']

	C = sklearn.metrics.pairwise.rbf_kernel(X, gamma=Gamma)
#	import pdb; pdb.set_trace()
#
	U = spectral_embedding(C, n_components=k)
	clf = KMeans(n_clusters=k)
	output['init_allocation'] = clf.fit_predict(U)
		
	while U_converged == False:
		for rep in range(12):
			part_1 = np.linalg.inv(A + delta*np.eye(d))
			part_2 = X.T.dot(H).dot(U).dot(U.T).dot(H).dot(X)
			n_1 = np.linalg.norm(part_1,'fro');
			n_2 = np.linalg.norm(part_2,'fro');
			lmbda = n_1/n_2;
			#lmbda = 1;
				
			for count in range(escape_count):
				FI = part_1 - lmbda*Const*np.power(1.1,count+1)*part_2
				#FI = lmbda*Const*np.power(1.1,count+1)*part_2 - part_1
				#print '\t\tpart 1 size : ', str(np.linalg.norm(part_1))
				#print '\t\tpart 2 size : ', str(np.linalg.norm(lmbda*np.power(1.1,count+1)*part_2))
		
				V,D = eig_sorted(FI)
				reduced_dim = np.sum(D < 0)


				if(reduced_dim < 1):
					count += 1
				else:
					break;

			if count == escape_count:
				print 'Error : Your Const is too small, try a larger value.'
				exit()


			L = V[:,-reduced_dim:]
			new_A = L.dot(L.T)	


			if(np.linalg.norm(new_A - A) < 0.001*np.linalg.norm(A)): break;
			else: A = new_A
		
	
		embed_dim = k
		if(reduced_dim < k): embed_dim = reduced_dim
	
		C = sklearn.metrics.pairwise.rbf_kernel(X.dot(L), gamma=Gamma)
		U_new = spectral_embedding(C, n_components=embed_dim)
	

		U_diff = np.linalg.norm(U_new[:,0:embed_dim] - U[:,0:embed_dim])

		print U_diff
		if(U_diff < 0.001*np.linalg.norm(U)): 
			U_converged = True
			output['allocation'] = allocation
			output['L'] = L

		print L.shape

		U = U_new[:,0:k]
		clf = KMeans(n_clusters=k)
		allocation = clf.fit_predict(U)
	

	return output

Example #42

0

Show file

File: MEGCorrelationSpectClust.py Project: bejar/MEGData

            mdata=np.vstack((mdata,data[i]))

for i in lnames:
    print i
    
cmatrix=correlationMatrix(mdata,0,400000,10)

corrmin=0.1
for i in range(cmatrix.shape[0]):
    for j in range(cmatrix.shape[1]):
        if cmatrix[i,j]<0:
            #cmatrix[i,j]=-cmatrix[i,j]
            cmatrix[i,j]=0
            
            
Pr=spectral_embedding(cmatrix,n_components=3)

labels = spectral_clustering(cmatrix, n_clusters=6, eigen_solver='arpack',assign_labels='discretize')
#clcen,labels=affinity_propagation(cmatrix,damping=0.5)


cm_bright = ListedColormap(['#000000','#FF0000','#00FF00','#0000FF','#FF00FF'
                            ,'#FFFF00','#00FFFF','#9999FF','#FF9999','#99FF99'])
#print Pr            

fig = plt.figure()

ax = fig.add_subplot(1,1,1, projection='3d')

plt.scatter(Pr[:,0],Pr[:,1],zs=Pr[:,2],c=labels,cmap=cm_bright,s=25,marker='o')

Example #43

0

Show file

File: group_parcellation.py Project: HaxbyLab/frontiers_2014

def parcel_selection(X, grp_mask, write_dir='/tmp/', method='ward',
                     k_range=KRANGE, criterion='ll', verbose=True):
    """ Functiond edicated to parcel selection """
    # Define the structure A of the data. Pixels connected to their neighbors.
    n_voxels, n_contrasts, n_subjects = X.shape
    n_components = 100

    # Define a spatial model
    shape = grp_mask.shape
    connectivity = grid_to_graph(shape[0], shape[1], shape[2], grp_mask).tocsr()

    # concatenate the data spatially
    Xv = np.reshape(X, (n_voxels, n_contrasts * n_subjects))
    X_ = PCA(n_components=n_components).fit_transform(Xv)

    i, j = connectivity.nonzero()
    sigma = np.sum((Xv[i] - Xv[j]) ** 2, 1).mean()
    if method == 'spectral':
        i, j = connectivity.nonzero()
        sigma = np.sum((Xv[i] - Xv[j]) ** 2, 1).mean()
        connectivity.data = np.exp( - np.sum((Xv[i] - Xv[j]) ** 2, 1) /
                                      (2 * sigma))
        
        maps = spectral_embedding(connectivity, n_components=n_components,
                              eigen_solver='arpack',
                              random_state=None,
                              eigen_tol=0.0, drop_first=False)
        
    del Xv
   
    # parcel selection
    all_bic = {}
    all_crit = {}
    for k in k_range:
        if method == 'ward':
            ward = Ward(n_clusters=k, 
                        connectivity=connectivity).fit(X_)
            labels = ward.labels_
        elif method == 'spectral':
            if k <= n_components:
                for i in range(10):
                    labels = discretize(maps[:, :k])
                    if len(np.unique(labels)) == k:
                        break
            else:
                _, labels, _ = k_means(maps[:, :100], n_clusters=k, n_init=1,
                         precompute_distances=False, max_iter=10)
        elif method == 'geometric':
            xyz = np.array(np.where(grp_mask)).T
            _, labels, _ = k_means(xyz, n_clusters=k, n_init=1,
                                   precompute_distances=False, max_iter=10)
        elif method in ['k-means', 'kmeans']:                
            _, labels, _ = k_means(X_, n_clusters=k, n_init=1,
                                   precompute_distances=False, max_iter=10)
        elif method == 'gmm':
            from sklearn.mixture import GMM
            labels = GMM(n_components=k, covariance_type='spherical', n_iter=10,
                      n_init=1).fit(X_).predict(X_)
            
        ll, bic = 0, 0
        for contrast in range(n_contrasts):
            ll1, mu_, sigma1_, sigma2_, bic_ = parameter_map(
                X[:, contrast], labels, null=False)
            bic += bic_.sum()
            if criterion == 'log-LR':
                ll2, _, _, _, bic_ = parameter_map(
                    X[:, contrast], labels, null=True)
                ll += np.sum((ll1 - ll2))
            elif criterion == 'll':
                ll += np.sum(ll1)
            elif criterion == 'sigma':
                ll = (sigma1_.mean(), sigma2_.mean())
            elif criterion == 'kfold':
                ll += score_spatial_model(X[:, contrast], labels, cv='kfold')
        all_crit[k] = ll
        all_bic[k] = bic
        if verbose:
            print 'k: ', k, ' bic: ', bic, ' crit: ', ll
    if criterion == 'log-LR':
        file = open(path.join( write_dir, 'all_llr_%s.pck' % method), 'w')
        pickle.dump(all_crit, file)
    elif criterion == 'll':
        file = open(path.join( write_dir, 'all_ll_%s.pck' % method), 'w')
        pickle.dump(all_crit, file)
    elif criterion == 'sigma':
        file = open(path.join( write_dir, 'all_sigma_%s.pck' % method), 'w')
        pickle.dump(all_crit, file)
    elif criterion == 'kfold':
        file = open(path.join( write_dir, 'all_kfold_%s.pck' % method), 'w')
        pickle.dump(all_crit, file)
    file = open(path.join( write_dir, 'all_bic_%s.pck' % method), 'w')
    pickle.dump(all_bic, file)
    return all_crit, all_bic

Example #44

0

Show file

File: SC_MNIST.py Project: WenjunJiang/DCN

import gzip 
import cPickle
import numpy


f = gzip.open('mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = cPickle.load(f)
f.close()

# perform SC on the test set
data_x, data_y = test_set

k = 12
nClass = 500
A = kneighbors_graph(data_x, k)
V = spectral_embedding(A, n_components = 10, drop_first = False)
V = V + numpy.absolute(numpy.min(V))
#V = V/numpy.amax(V)
#
km_model = KMeans(n_clusters = nClass)
ypred = km_model.fit_predict(V)
nmi = metrics.normalized_mutual_info_score(data_y, ypred)
print('The NMI is: %.4f'%nmi)
#
V = numpy.float32(V)

f = gzip.open('EVD-test500.pkl.gz', 'wb')
cPickle.dump([(V, data_y), 0, 0], f, protocol = 2)
f.close()
#sio.savemat('V_train_10.mat', {'train_x': V, 'train_y': data_y})

Example #45

0

Show file

File: scimes.py Project: Astroua/SCIMES

def cloudstering(dendrogram, catalog, criteria, user_k, user_ams, user_scalpars, user_iter, 
    save_isol_leaves, save_clust_leaves, save_branches, blind, rms, s2nlim, locscal):

    """
    SCIMES main function. It collects parents/children
    of all structrures within the dendrogram, and their
    properties. It calls the affinity matrix-related
    functions (for creation, rescaling, cluster counting),
    and it runs several time the actual spectral clustering
    routine by calculating every time the silhouette of the
    current configuration. Input parameter are passed by the
    SpectralCloudstering class.
    
    Parameters
    -----------

    dendrogram: 'astrodendro.dendrogram.Dendrogram' instance
        The dendrogram to clusterize.

    catalog: 'astropy.table.table.Table' instance
        A catalog containing all properties of the dendrogram
        structures. Generally generated with ppv_catalog module.

    header: 'astropy.io.fits.header.Header' instance
        The header of the fits data the dendrogram was 
        generated from. Necessary to obtain the assignment cubes.

    criteria: list of strings
        Clustering criteria referred to the structure properties
        in the catalog (default ['volume', 'luminosity']).

    user_k: int
        The expected number of clusters, if not provided
        it will be guessed automatically through the eigenvalues
        of the unsmoothed affinity matrix.

    user_ams: numpy array
        User provided affinity matrix. Whether this is not
        furnish it is automatically generated through the
        volume and/or luminosity criteria.

    user_scalpars: list of floats
        User-provided scaling parameters to smooth the
        affinity matrices.

    user_iter: int
        User-provided number of k-means iterations.
    
    save_isol_leaves: bool
        Consider the isolated leaves (without parent) 
        as individual 'clusters'. Useful for low
        resolution data where the beam size
        corresponds to the size of a Giant
        Molecular Cloud.

    save_clust_leaves: bool
        Consider unclustered leaves as individual
        'clusters'. This keyword will not include
        the isolated leaves without parents.

    save_all_leaves: bool
        Trigger both save_isol_leaves and
        save_clust_leaves.

    save_branches: bool
        Retain all isolated branches usually discarded
        by the cluster analysis.

    save_all: bool
        Trigger all save_isol_leaves, 
        save_clust_leaves, and save_branches.        
    
    rms: int or float
        Noise level of the observation. Necessary tolist
        calculate the scaling parameter above a certain
        signal-to-noise ratio.

    s2nlim: int or float
        Signal-to-noise limit above which the
        scaling parameter is calculated. Needed
        only if rms is not np.nan.

    blind: bool
        Show the affinity matrices. 
        Matplotlib required.

    locscaling: bool
        Smooth the affinity matrices using a local
        scaling technique.


    Return
    -------

    clusts: list
        The dendrogram branch indexes corresponding to the
        identified clusters

    catalog: 'astropy.table.table.Table' instance
        The input catalog updated with dendrogram structure
        parent, ancestor, number of leaves, and type 
        ('T', trunks or branches without parent; 'B', branches
        with parent; 'L', leaves). 

    AMs: numpy array
        The affinity matrices calculated by the algorithm
    
    escalpars: list
        Estimated scaling parameters for the different
        affinity matrixes
    
    silhouette: float
        Silhouette of the best cluster configuration

    """

    # Collecting all connectivity and other information into more handy lists
    all_structures_idx = np.arange(len(catalog[criteria[0]].data), dtype='int')

    all_levels = []
    brc_levels = []

    all_leav_names = []
    all_leav_idx = []

    all_brc_names = []
    all_brc_idx = []

    all_parents = []
    all_children = []

    all_struct_names = []
    all_ancestors = []

    all_struct_ancestors = []
    all_struct_parents = []
    all_struct_types = []
    nleaves = []

    trunk_brs_idx = []
    two_clust_idx = []    
    mul_leav_idx = []

    s2ns = []

    for structure_idx in all_structures_idx:

        s = dendrogram[structure_idx]
        all_levels.append(s.level)
        
        s2ns.append(dendrogram[structure_idx].height/rms)

        all_struct_names.append(str(s.idx))
        all_struct_ancestors.append(s.ancestor.idx)
        if s.parent:
            all_struct_parents.append(s.parent.idx)
        else:
            all_struct_parents.append(-1)
        nleaves.append(len(s.sorted_leaves()))

        ancestors = []
        anc = s.parent
        while anc != None:

            ancestors.append(anc.idx)
            anc = anc.parent

        ancestors.append(s.idx)
        all_ancestors.append(ancestors)

        # If structure is a leaf find all the parents
        if s.is_leaf and s.parent != None:

            par = s.parent
            all_leav_names.append(str(s.idx))

            parents = []
            
            while par != None:

                parents.append(par.idx)
                par = par.parent
                
            parents.append(len(catalog[criteria[0]].data)) # This is the trunk!
            all_parents.append(parents)
            
        # If structure is a brach find all its leaves
        if s.is_branch:

            brc_levels.append(s.level)
            all_brc_idx.append(s.idx)
            all_brc_names.append(str(s.idx))
            
            children = []
            
            for leaf in s.sorted_leaves():
                children.append(leaf.idx)
                
            all_children.append(children)

            # Trunk branches
            if s.parent == None:

                trunk_brs_idx.append(s.idx)
                all_leav_idx = all_leav_idx + children

                if s.children[0].is_branch or s.children[1].is_branch:
                    mul_leav_idx = mul_leav_idx + children
                else:
                    two_clust_idx.append(s.idx)

                all_struct_types.append('T')

            else:

                all_struct_types.append('B')
        
        else:

            all_struct_types.append('L')


    two_clust_idx = np.unique(two_clust_idx).tolist()
    
    dict_parents = dict(zip(all_leav_names,all_parents))            
    dict_children = dict(zip(all_brc_names,all_children))    
    dict_ancestors = dict(zip(all_struct_names,all_ancestors))

    all_levels.append(-1)
    all_levels = np.asarray(all_levels)

    # Retriving needed properties from the catalog
    # and adding fake "trunk" properties   
    props = []
    for crit in criteria:
        prop = catalog[crit].data.tolist()
        tprop = sum(catalog[crit].data[trunk_brs_idx])
        prop.append(tprop)
        props.append(prop)
    
    s2ns.append(1)
    props.append(s2ns)


    # Generating affinity matrices if not provided
    if user_ams == None:

        AMs = aff_matrix(len(all_leav_idx), len(catalog[criteria[0]].data), \
            all_leav_idx, all_brc_idx, brc_levels, dict_children, props)

        if blind == False:

            # Showing all affinity matrices
            for i, crit in enumerate(criteria):

                plt.matshow(AMs[i,:,:])
                plt.title('"'+crit+'" affinity matrix', fontsize = 'medium')
                plt.xlabel('leaf index')
                plt.ylabel('leaf index')    
                plt.colorbar()
        
    else:

        AMs = user_ams


    S2Nmat = AMs[-1,:,:]
    AMs = AMs[:-1,:,:]

    # Check if the affinity matrix has more than 2 elements
    # otherwise return everything as clusters ("save_all").
    if AMs.shape[1] <= 2:

        print("--- Not necessary to cluster. 'save_all' keyword triggered")

        all_leaves = []
        for leaf in dendrogram.leaves:
            all_leaves.append(leaf.idx)

        clusts = all_leaves

        return clusts, AMs
        
                
    # Check whether the affinity matrix scaling parameter
    # are provided by the user, if so use them, otherwise
    # calculate them    

    """
    scpars = np.zeros(len(criteria))
    
    if user_scalpars is not None:
        print("- Using user-provided scaling parameters")
        user_scalpars = np.asarray(user_scalpars)
        scpars[0:len(user_scalpars)] = user_scalpars
    """
       
    scpars = np.array(user_scalpars)         

    print("- Start spectral clustering")

    # Selecting the criteria and merging the matrices    
    escalpars = []
    AM = np.ones(AMs[0,:,:].shape)
    for i, crit in enumerate(criteria):

        print("-- Rescaling %s matrix" % crit)
        AMc, sigma = mat_smooth(AMs[i,:,:], S2Nmat, s2nlim = s2nlim, 
            scalpar = scpars[i], lscal = locscal)        
        AM = AM*AMc
        escalpars.append(sigma)
            
    
    # Making the reduced affinity matrices
    mul_leav_mat = []
    for mli in mul_leav_idx:
        mul_leav_mat.append(all_leav_idx.index(mli))

    mul_leav_mat = np.asarray(mul_leav_mat)
    rAM = AM[mul_leav_mat,:]
    rAM = rAM[:,mul_leav_mat]

    if blind == False:
            
        # Showing the final affinity matrix
        plt.matshow(AM)
        plt.colorbar()
        plt.title('Final Affinity Matrix')
        plt.xlabel('leaf index')
        plt.ylabel('leaf index')

      
    # Guessing the number of clusters
    # if not provided

    if user_k == 0:   
        kg = guessk(rAM)
    else:
        kg = user_k-len(two_clust_idx)

    print("-- Guessed number of clusters = %i" % (kg+len(two_clust_idx)))
    
    if kg > 1:

        print("-- Number of k-means iteration: %i" % user_iter)

        # Find the best cluster number
        sils = []

        min_ks = max(2,kg-15)
        max_ks = min(kg+15,rAM.shape[0]-1)
                
        clust_configs = []

        for ks in range(min_ks,max_ks):

            try:
                
                evecs = spectral_embedding(rAM, n_components=ks,
                                        eigen_solver='arpack',
                                        random_state=222,
                                        eigen_tol=0.0, drop_first=False)
                _, all_clusters, _ = k_means(evecs, ks, random_state=222, n_init=user_iter)
                
                sil = silhouette_score(evecs, np.asarray(all_clusters), metric='euclidean')

                clust_configs.append(all_clusters)

            except np.linalg.LinAlgError:

                sil = 0
                
            sils.append(sil)
                    
        # Use the best cluster number to generate clusters                    
        best_ks = sils.index(max(sils))+min_ks
        print("-- Best cluster number found through SILHOUETTE (%f)= %i" % (max(sils), best_ks+len(two_clust_idx)))        
        silhoutte = max(sils)
        
        all_clusters = clust_configs[np.argmax(sils)]
                        
    else:

        print("-- Not necessary to cluster")
        all_clusters = np.zeros(len(all_leaves_idx), dtype = np.int32)

    clust_branches = clust_cleaning(mul_leav_idx, all_clusters, dict_parents, dict_children, dict_ancestors, savebranches = save_branches)
    clusts = clust_branches + two_clust_idx

    print("-- Final cluster number (after cleaning) %i" % len(clusts))
    

    # Calculate the silhouette after cluster cleaning
    #fclusts_idx = np.ones(len(mul_leav_idx))
    fclusts_idx = -1*all_clusters

    i = 1
    for clust in clusts:
        i += 1
        fleavs = dendrogram[clust].sorted_leaves()

        fleavs_idx = []
        for fleav in fleavs:
            fleavs_idx.append(fleav.idx)

        fleavs_idx = np.asarray(fleavs_idx)

        # Find the position of the cluster leaves
        pos = np.where(np.in1d(mul_leav_idx,fleavs_idx))[0]
        fclusts_idx[pos] = i

    oldclusts = np.unique(fclusts_idx[fclusts_idx < 0])

    for oldclust in oldclusts:
        fclusts_idx[fclusts_idx == oldclust] = np.max(fclusts_idx)+1

    evecs = spectral_embedding(rAM, n_components=ks,
                            eigen_solver='arpack',
                            random_state=222,
                            eigen_tol=0.0, drop_first=False)
    sil = silhouette_score(evecs, fclusts_idx, metric='euclidean')

    print("-- Final clustering configuration silhoutte %f" % sil)


    all_struct_types = np.asarray(all_struct_types)
    all_struct_parents = np.asarray(all_struct_parents)

    # Add the isolated leaves to the cluster list, if required
    if save_isol_leaves:

        isol_leaves = all_structures_idx[(all_struct_parents == -1) & (all_struct_types == 'L')]
        clusts = clusts + list(isol_leaves)

        print("SAVE_ISOL_LEAVES triggered. Isolated leaves added.") 
        print("-- Total cluster number %i" % len(clusts))


    # Add the unclustered leaves within clusters to the cluster list, if required
    if save_clust_leaves:

        isol_leaves = all_structures_idx[(all_struct_parents == -1) & (all_struct_types == 'L')]

        all_leaves = []
        for leaf in dendrogram.leaves:
            all_leaves.append(leaf.idx)

        clust_leaves = []
        for clust in clusts:
            for leaf in dendrogram[clust].sorted_leaves():
                clust_leaves.append(leaf.idx)

        unclust_leaves = list(set(all_leaves)-set(clust_leaves + list(isol_leaves)))
        clusts = clusts + unclust_leaves

        print("SAVE_CLUST_LEAVES triggered. Unclustered leaves added.")
        print("-- Total cluster number %i" % len(clusts))
    

    # Update the catalog with new information
    catalog['parent'] = all_struct_parents
    catalog['ancestor'] = all_struct_ancestors
    catalog['n_leaves'] = nleaves
    catalog['structure_type'] = all_struct_types

    return clusts, catalog, AMs, escalpars, silhoutte

Example #46

0

Show file

File: spectralembedding.py Project: rameshvarun/spectralembedding

images = [ Image.open(os.path.join("images", name)) for name in image_names ]
hist = []
for name in image_names:
	hists = []
	for i in range(3):
		hists.append( cv2.calcHist( [cv2.imread(os.path.join("images", name)).astype('float32') ], [i], None, [20], [0, 256] ) )
	hist.append(hists)

blocks = {}

similarity = np.empty([len(image_names), len(image_names)])

print "Calculating similarities..."
for i, image1 in enumerate(images):
	for j, image2 in enumerate(images):
		similarity[i, j] = sum( abs( cv2.compareHist(hist[i][k], hist[j][k], cv2.HISTCMP_CORREL) ) for k in range(3) )

positions = manifold.spectral_embedding(similarity, 1)
print positions

THUMB_WIDTH = 100
THUMB_HEIGHT = 100

sorted_images = sorted(zip(positions, images))
thumbnails = [ImageOps.fit(im, (THUMB_WIDTH, THUMB_HEIGHT)) for pos, im in sorted_images]
collage = Image.new('RGB', (THUMB_WIDTH * len(image_names), THUMB_HEIGHT))

for i, im in enumerate(thumbnails):
	collage.paste(im, (i*THUMB_WIDTH, 0))

collage.show()