Example #1
0
    def fit(self, biadjacency: sparse.csr_matrix):
        """Embedding of bipartite graphs from the clustering obtained with Louvain.

        Parameters
        ----------
        biadjacency:
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiLouvainEmbedding`
        """
        bilouvain = BiLouvain(resolution=self.resolution,
                              modularity=self.modularity,
                              tol_optimization=self.tol_optimization,
                              tol_aggregation=self.tol_aggregation,
                              n_aggregations=self.n_aggregations,
                              shuffle_nodes=self.shuffle_nodes,
                              sort_clusters=False,
                              return_membership=True,
                              return_aggregate=True,
                              random_state=self.random_state)
        bilouvain.fit(biadjacency)

        self.labels_ = bilouvain.labels_

        embedding_row = bilouvain.membership_row_
        embedding_col = bilouvain.membership_col_

        if self.isolated_nodes in ['remove', 'merge']:
            # remove or merge isolated column nodes and reindex labels
            labels_unique, counts = np.unique(bilouvain.labels_col_,
                                              return_counts=True)
            n_labels = max(labels_unique) + 1
            labels_old = labels_unique[counts > 1]
            if self.isolated_nodes == 'remove':
                labels_new = -np.ones(n_labels, dtype='int')
            else:
                labels_new = len(labels_old) * np.ones(n_labels, dtype='int')
            labels_new[labels_old] = np.arange(len(labels_old))
            labels_col = labels_new[bilouvain.labels_col_]

            # reindex row labels accordingly
            labels_unique = np.unique(bilouvain.labels_row_)
            n_labels = max(labels_unique) + 1
            labels_new = -np.ones(n_labels, dtype='int')
            labels_new[labels_old] = np.arange(len(labels_old))
            labels_row = labels_new[bilouvain.labels_row_]

            # get embeddings
            probs = normalize(biadjacency)
            embedding_row = probs.dot(membership_matrix(labels_col))
            probs = normalize(biadjacency.T)
            embedding_col = probs.dot(membership_matrix(labels_row))

        self.embedding_row_ = embedding_row.toarray()
        self.embedding_col_ = embedding_col.toarray()
        self.embedding_ = self.embedding_row_

        return self
    def fit(self,
            input_matrix: sparse.csr_matrix,
            force_bipartite: bool = False):
        """Embedding of graphs from the clustering obtained with Louvain.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite : bool (default = ``False``)
            If ``True``, force the input matrix to be considered as a biadjacency matrix.
        Returns
        -------
        self: :class:`BiLouvainEmbedding`
        """
        louvain = Louvain(resolution=self.resolution,
                          modularity=self.modularity,
                          tol_optimization=self.tol_optimization,
                          tol_aggregation=self.tol_aggregation,
                          n_aggregations=self.n_aggregations,
                          shuffle_nodes=self.shuffle_nodes,
                          sort_clusters=False,
                          return_membership=True,
                          return_aggregate=True,
                          random_state=self.random_state)
        louvain.fit(input_matrix, force_bipartite=force_bipartite)

        # isolated nodes
        if is_square(input_matrix):
            labels = louvain.labels_
            labels_secondary = None
        else:
            labels = louvain.labels_col_
            labels_secondary = louvain.labels_row_

        self.labels_, labels_row = reindex_labels(labels, labels_secondary,
                                                  self.isolated_nodes)

        # embedding
        probs = normalize(input_matrix)
        embedding_ = probs.dot(membership_matrix(self.labels_))
        self.embedding_ = embedding_.toarray()

        if labels_row is not None:
            probs = normalize(input_matrix.T)
            embedding_col = probs.dot(membership_matrix(labels_row))
            self.embedding_row_ = self.embedding_
            self.embedding_col_ = embedding_col.toarray()

        return self
Example #3
0
    def _secondary_outputs(self, biadjacency):
        """Compute different variables from labels_."""
        if self.return_membership:
            membership_row = membership_matrix(self.labels_row_)
            membership_col = membership_matrix(self.labels_col_)
            self.membership_row_ = normalize(biadjacency.dot(membership_col))
            self.membership_col_ = normalize(biadjacency.T.dot(membership_row))

        if self.return_aggregate:
            membership_row = membership_matrix(self.labels_row_)
            membership_col = membership_matrix(self.labels_col_)
            biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency))
            biadjacency_ = biadjacency_.dot(membership_col)
            self.biadjacency_ = biadjacency_
        return self
Example #4
0
    def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
        """Predict the embedding of new rows, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency row vectors.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        if self.embedding_col_ is not None:
            n = len(self.embedding_col_)
        else:
            n = len(self.embedding_)

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
        check_nonnegative(adjacency_vectors)
        membership = membership_matrix(self.labels_)

        return normalize(adjacency_vectors).dot(membership)
Example #5
0
    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new nodes, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        n = self.embedding_.shape[0]

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
        check_nonnegative(adjacency_vectors)

        membership = membership_matrix(self.labels_)

        return adjacency_vectors.dot(membership)
Example #6
0
 def _secondary_outputs(self, adjacency):
     """Compute different variables from labels_."""
     if self.return_membership or self.return_aggregate:
         membership = membership_matrix(self.labels_)
         if self.return_membership:
             self.membership_ = normalize(adjacency.dot(membership))
         if self.return_aggregate:
             self.adjacency_ = sparse.csr_matrix(
                 membership.T.dot(adjacency.dot(membership)))
     return self
Example #7
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict] = None) \
            -> 'Propagation':
        """Node classification by label propagation.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.

        Returns
        -------
        self: :class:`Propagation`
        """
        adjacency = check_format(adjacency)
        n = adjacency.shape[0]
        index_seed, index_remain, labels_seed = self._instanciate_vars(
            adjacency, seeds)

        if self.node_order == 'random':
            np.random.shuffle(index_remain)
        elif self.node_order == 'decreasing':
            index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32)
            index_remain = index[index_remain]
        elif self.node_order == 'increasing':
            index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32)
            index_remain = index[index_remain]

        labels = -np.ones(n, dtype=np.int32)
        labels[index_seed] = labels_seed
        labels_remain = np.zeros_like(index_remain, dtype=np.int32)

        indptr = adjacency.indptr.astype(np.int32)
        indices = adjacency.indices.astype(np.int32)
        if self.weighted:
            data = adjacency.data.astype(np.float32)
        else:
            data = np.ones(n, dtype=np.float32)

        t = 0
        while t < self.n_iter and not np.array_equal(labels_remain,
                                                     labels[index_remain]):
            t += 1
            labels_remain = labels[index_remain].copy()
            labels = np.asarray(
                vote_update(indptr, indices, data, labels, index_remain))

        membership = membership_matrix(labels)
        membership = normalize(adjacency.dot(membership))

        self.labels_ = labels
        self.membership_ = membership

        return self
Example #8
0
 def _secondary_outputs(self, adjacency):
     """Compute different variables from labels_."""
     if self.return_membership or self.return_aggregate:
         if np.issubdtype(adjacency.data.dtype, np.bool_):
             adjacency = adjacency.astype(float)
         membership = membership_matrix(self.labels_)
         if self.return_membership:
             self.membership_ = normalize(adjacency.dot(membership))
         if self.return_aggregate:
             self.adjacency_ = sparse.csr_matrix(membership.T.dot(adjacency.dot(membership)))
     return self
Example #9
0
    def _secondary_outputs(self, input_matrix: sparse.csr_matrix):
        """Compute different variables from labels_."""
        if self.return_membership or self.return_aggregate:
            if np.issubdtype(input_matrix.data.dtype, np.bool_):
                input_matrix = input_matrix.astype(float)
            if not self.bipartite:
                membership = membership_matrix(self.labels_)
                if self.return_membership:
                    self.membership_ = normalize(input_matrix.dot(membership))
                if self.return_aggregate:
                    self.aggregate_ = sparse.csr_matrix(
                        membership.T.dot(input_matrix.dot(membership)))
            else:
                if self.labels_col_ is None:
                    n_labels = max(self.labels_) + 1
                    membership_row = membership_matrix(self.labels_,
                                                       n_labels=n_labels)
                    membership_col = normalize(
                        input_matrix.T.dot(membership_row))
                else:
                    n_labels = max(max(self.labels_row_), max(
                        self.labels_col_)) + 1
                    membership_row = membership_matrix(self.labels_row_,
                                                       n_labels=n_labels)
                    membership_col = membership_matrix(self.labels_col_,
                                                       n_labels=n_labels)
                if self.return_membership:
                    self.membership_row_ = normalize(
                        input_matrix.dot(membership_col))
                    self.membership_col_ = normalize(
                        input_matrix.T.dot(membership_row))
                    self.membership_ = self.membership_row_
                if self.return_aggregate:
                    aggregate_ = sparse.csr_matrix(
                        membership_row.T.dot(input_matrix))
                    aggregate_ = aggregate_.dot(membership_col)
                    self.aggregate_ = aggregate_

        return self
Example #10
0
    def _secondary_outputs(self, biadjacency):
        """Compute different variables from labels_."""
        if self.return_membership or self.return_aggregate:
            if np.issubdtype(biadjacency.data.dtype, np.bool_):
                biadjacency = biadjacency.astype(float)

        if self.return_membership:
            n_labels = max(max(self.labels_row_), max(self.labels_col_)) + 1
            membership_row = membership_matrix(self.labels_row_, n_labels=n_labels)
            membership_col = membership_matrix(self.labels_col_, n_labels=n_labels)
            self.membership_row_ = normalize(biadjacency.dot(membership_col))
            self.membership_col_ = normalize(biadjacency.T.dot(membership_row))
            self.membership_ = self.membership_row_

        if self.return_aggregate:
            n_labels = max(max(self.labels_row_), max(self.labels_col_)) + 1
            membership_row = membership_matrix(self.labels_row_, n_labels=n_labels)
            membership_col = membership_matrix(self.labels_col_, n_labels=n_labels)
            biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency))
            biadjacency_ = biadjacency_.dot(membership_col)
            self.biadjacency_ = biadjacency_
        return self
Example #11
0
    def fit(self, adjacency: sparse.csr_matrix):
        """Embedding of bipartite graphs from a clustering obtained with Louvain.

        Parameters
        ----------
        adjacency:
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiLouvainEmbedding`
        """
        louvain = Louvain(resolution=self.resolution,
                          modularity=self.modularity,
                          tol_optimization=self.tol_optimization,
                          tol_aggregation=self.tol_aggregation,
                          n_aggregations=self.n_aggregations,
                          shuffle_nodes=self.shuffle_nodes,
                          sort_clusters=True,
                          return_membership=True,
                          return_aggregate=True,
                          random_state=self.random_state)
        louvain.fit(adjacency)

        self.labels_ = louvain.labels_

        embedding_ = louvain.membership_

        if self.isolated_nodes in ['remove', 'merge']:
            # remove or merge isolated nodes and reindex labels
            labels_unique, counts = np.unique(louvain.labels_,
                                              return_counts=True)
            n_labels = max(labels_unique) + 1
            labels_old = labels_unique[counts > 1]
            if self.isolated_nodes == 'remove':
                labels_new = -np.ones(n_labels, dtype='int')
            else:
                labels_new = len(labels_old) * np.ones(n_labels, dtype='int')
            labels_new[labels_old] = np.arange(len(labels_old))
            labels_ = labels_new[louvain.labels_]

            # get embeddings
            probs = normalize(adjacency)
            embedding_ = probs.dot(membership_matrix(labels_))

        self.embedding_ = embedding_.toarray()

        return self
Example #12
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) \
            -> 'Propagation':
        """Node classification by label propagation.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.

        Returns
        -------
        self: :class:`Propagation`
        """
        adjacency = check_format(adjacency)
        n = adjacency.shape[0]
        index_seed, index_remain, labels_seed = self._instanciate_vars(
            adjacency, seeds)

        labels = -np.ones(n, dtype=int)
        labels[index_seed] = labels_seed
        labels_remain = np.zeros_like(index_remain, dtype=int)
        t = 0
        while t < self.n_iter and not np.array_equal(labels_remain,
                                                     labels[index_remain]):
            t += 1
            labels_remain = labels[index_remain].copy()
            for i in index_remain:
                labels_ = labels[
                    adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i +
                                                                           1]]]
                labels_ = labels_[labels_ >= 0]
                if len(labels_):
                    labels_unique, counts = np.unique(labels_,
                                                      return_counts=True)
                    labels[i] = labels_unique[np.argmax(counts)]

        membership = membership_matrix(labels)
        membership = normalize(adjacency.dot(membership))

        self.labels_ = labels
        self.membership_ = membership

        return self
Example #13
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) \
            -> 'Propagation':
        """Node classification by label propagation.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.

        Returns
        -------
        self: :class:`Propagation`
        """
        adjacency = check_format(adjacency)
        n = adjacency.shape[0]
        index_seed, index_remain, labels_seed = self._instanciate_vars(
            adjacency, seeds)

        labels = -np.ones(n, dtype=np.int32)
        labels[index_seed] = labels_seed
        labels_remain = np.zeros_like(index_remain, dtype=np.int32)

        indptr = adjacency.indptr.astype(np.int32)
        indices = adjacency.indices.astype(np.int32)

        t = 0
        while t < self.n_iter and not np.array_equal(labels_remain,
                                                     labels[index_remain]):
            t += 1
            labels_remain = labels[index_remain].copy()
            labels = vote_update(indptr, indices, labels, index_remain)

        membership = membership_matrix(labels)
        membership = normalize(adjacency.dot(membership))

        self.labels_ = labels
        self.membership_ = membership

        return self
Example #14
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Louvain':
        """Fit algorithm to the data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Louvain`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n_nodes = adjacency.shape[0]

        probs_out = check_probs('degree', adjacency)
        probs_in = check_probs('degree', adjacency.T)

        nodes = np.arange(n_nodes)
        if self.shuffle_nodes:
            nodes = self.random_state.permutation(nodes)
            adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr()

        adjacency_norm = adjacency / adjacency.data.sum()

        membership = sparse.identity(n_nodes, format='csr')
        increase = True
        count_aggregations = 0
        self.log.print("Starting with", n_nodes, "nodes.")
        while increase:
            count_aggregations += 1

            current_labels, pass_increase = self._optimize(
                n_nodes, adjacency_norm, probs_out, probs_in)
            _, current_labels = np.unique(current_labels, return_inverse=True)

            if pass_increase <= self.tol_aggregation:
                increase = False
            else:
                membership_agg = membership_matrix(current_labels)
                membership = membership.dot(membership_agg)
                n_nodes, adjacency_norm, probs_out, probs_in = self._aggregate(
                    adjacency_norm, probs_out, probs_in, membership_agg)

                if n_nodes == 1:
                    break
            self.log.print("Aggregation", count_aggregations, "completed with",
                           n_nodes, "clusters and ", pass_increase,
                           "increment.")
            if count_aggregations == self.n_aggregations:
                break

        if self.sort_clusters:
            labels = reindex_labels(membership.indices)
        else:
            labels = membership.indices
        if self.shuffle_nodes:
            reverse = np.empty(nodes.size, nodes.dtype)
            reverse[nodes] = np.arange(nodes.size)
            labels = labels[reverse]

        self.labels_ = labels
        self._secondary_outputs(adjacency)

        return self
Example #15
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Louvain':
        """Fit algorithm to the data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Louvain`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n = adjacency.shape[0]

        if self.modularity == 'potts':
            probs_ou = check_probs('uniform', adjacency)
            probs_in = probs_ou.copy()
        elif self.modularity == 'newman':
            probs_ou = check_probs('degree', adjacency)
            probs_in = probs_ou.copy()
        elif self.modularity == 'dugue':
            probs_ou = check_probs('degree', adjacency)
            probs_in = check_probs('degree', adjacency.T)
        else:
            raise ValueError('Unknown modularity function.')

        nodes = np.arange(n, dtype=np.int32)
        if self.shuffle_nodes:
            nodes = self.random_state.permutation(nodes)
            adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr()

        adjacency_clust = adjacency / adjacency.data.sum()

        membership = sparse.identity(n, format='csr')
        increase = True
        count_aggregations = 0
        self.log.print("Starting with", n, "nodes.")
        while increase:
            count_aggregations += 1

            labels_clust, pass_increase = self._optimize(
                adjacency_clust, probs_ou, probs_in)
            _, labels_clust = np.unique(labels_clust, return_inverse=True)

            if pass_increase <= self.tol_aggregation:
                increase = False
            else:
                membership_clust = membership_matrix(labels_clust)
                membership = membership.dot(membership_clust)
                adjacency_clust, probs_ou, probs_in = self._aggregate(
                    adjacency_clust, probs_ou, probs_in, membership_clust)

                n = adjacency_clust.shape[0]
                if n == 1:
                    break
            self.log.print("Aggregation", count_aggregations, "completed with",
                           n, "clusters and ", pass_increase, "increment.")
            if count_aggregations == self.n_aggregations:
                break

        if self.sort_clusters:
            labels = reindex_labels(membership.indices)
        else:
            labels = membership.indices
        if self.shuffle_nodes:
            reverse = np.empty(nodes.size, nodes.dtype)
            reverse[nodes] = np.arange(nodes.size)
            labels = labels[reverse]

        self.labels_ = labels
        self._secondary_outputs(adjacency)

        return self
Example #16
0
    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
        """Fit algorithm to data.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite :
            If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.

        Returns
        -------
        self: :class:`Louvain`
        """
        self._init_vars()

        if self.modularity == 'dugue':
            adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=True,
                                                      force_bipartite=force_bipartite)
        else:
            adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)

        n = adjacency.shape[0]

        if self.modularity == 'potts':
            probs_out = get_probs('uniform', adjacency)
            probs_in = probs_out.copy()
        elif self.modularity == 'newman':
            probs_out = get_probs('degree', adjacency)
            probs_in = probs_out.copy()
        elif self.modularity == 'dugue':
            probs_out = get_probs('degree', adjacency)
            probs_in = get_probs('degree', adjacency.T)
        else:
            raise ValueError('Unknown modularity function.')

        nodes = np.arange(n)
        if self.shuffle_nodes:
            nodes = self.random_state.permutation(nodes)
            adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr()

        adjacency_cluster = adjacency / adjacency.data.sum()

        membership = sparse.identity(n, format='csr')
        increase = True
        count_aggregations = 0
        self.log.print("Starting with", n, "nodes.")
        while increase:
            count_aggregations += 1

            labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in)
            _, labels_cluster = np.unique(labels_cluster, return_inverse=True)

            if pass_increase <= self.tol_aggregation:
                increase = False
            else:
                membership_cluster = membership_matrix(labels_cluster)
                membership = membership.dot(membership_cluster)
                adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in,
                                                                        membership_cluster)

                n = adjacency_cluster.shape[0]
                if n == 1:
                    break
            self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ",
                           pass_increase, "increment.")
            if count_aggregations == self.n_aggregations:
                break

        if self.sort_clusters:
            labels = reindex_labels(membership.indices)
        else:
            labels = membership.indices
        if self.shuffle_nodes:
            reverse = np.empty(nodes.size, nodes.dtype)
            reverse[nodes] = np.arange(nodes.size)
            labels = labels[reverse]

        self.labels_ = labels
        if self.bipartite:
            self._split_vars(input_matrix.shape)
        self._secondary_outputs(input_matrix)

        return self
Example #17
0
    def fit(self, biadjacency: Union[sparse.csr_matrix,
                                     np.ndarray]) -> 'BiKMeans':
        """Apply embedding method followed by clustering to the graph.

        Parameters
        ----------
        biadjacency:
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiKMeans`
        """
        n_row, n_col = biadjacency.shape
        check_n_clusters(self.n_clusters, n_row)

        method = self.embedding_method
        method.fit(biadjacency)

        if self.co_cluster:
            embedding = np.vstack(
                (method.embedding_row_, method.embedding_col_))
        else:
            embedding = method.embedding_

        kmeans = KMeansDense(self.n_clusters)
        kmeans.fit(embedding)

        if self.sort_clusters:
            labels = reindex_labels(kmeans.labels_)
        else:
            labels = kmeans.labels_

        self.labels_ = labels
        if self.co_cluster:
            self._split_vars(n_row)
        else:
            self.labels_row_ = labels

        if self.return_membership:
            membership_row = membership_matrix(self.labels_row_,
                                               n_labels=self.n_clusters)
            if self.labels_col_ is not None:
                membership_col = membership_matrix(self.labels_col_,
                                                   n_labels=self.n_clusters)
                self.membership_row_ = normalize(
                    biadjacency.dot(membership_col))
                self.membership_col_ = normalize(
                    biadjacency.T.dot(membership_row))
            else:
                self.membership_row_ = normalize(
                    biadjacency.dot(biadjacency.T.dot(membership_row)))
            self.membership_ = self.membership_row_

        if self.return_aggregate:
            membership_row = membership_matrix(self.labels_row_,
                                               n_labels=self.n_clusters)
            biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency))
            if self.labels_col_ is not None:
                membership_col = membership_matrix(self.labels_col_,
                                                   n_labels=self.n_clusters)
                biadjacency_ = biadjacency_.dot(membership_col)
            self.biadjacency_ = biadjacency_

        return self
Example #18
0
    def fit(self, biadjacency: sparse.csr_matrix):
        """Embedding of bipartite graphs from a clustering obtained with Louvain.

        Parameters
        ----------
        biadjacency:
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiLouvainEmbedding`
        """
        bilouvain = BiLouvain(resolution=self.resolution,
                              modularity=self.modularity,
                              tol_optimization=self.tol_optimization,
                              tol_aggregation=self.tol_aggregation,
                              n_aggregations=self.n_aggregations,
                              shuffle_nodes=self.shuffle_nodes,
                              sort_clusters=True,
                              return_membership=True,
                              return_aggregate=True,
                              random_state=self.random_state)
        bilouvain.fit(biadjacency)

        self.labels_ = bilouvain.labels_

        embedding_row = bilouvain.membership_row_
        embedding_col = bilouvain.membership_col_

        if self.merge_isolated:
            _, counts_row = np.unique(bilouvain.labels_row_,
                                      return_counts=True)
            n_isolated_nodes_row = (counts_row == 1).sum()
            if n_isolated_nodes_row:
                size_row = (biadjacency.shape[0], len(counts_row))
                embedding_row.resize(size_row)
                labels_row = bilouvain.labels_row_
                labels_row[-n_isolated_nodes_row:] = labels_row[
                    -n_isolated_nodes_row]
                merge_labels_row = np.arange(len(counts_row), dtype=int)
                merge_labels_row[-n_isolated_nodes_row:] = merge_labels_row[
                    -n_isolated_nodes_row]
                combiner_row = membership_matrix(merge_labels_row)
                embedding_row = embedding_row.dot(combiner_row)
                self.labels_ = labels_row

            _, counts_col = np.unique(bilouvain.labels_col_,
                                      return_counts=True)
            n_isolated_nodes_col = (counts_col == 1).sum()
            if n_isolated_nodes_col:
                size_col = (biadjacency.shape[1], len(counts_col))
                embedding_col.resize(size_col)
                merge_labels_col = np.arange(embedding_col.shape[1], dtype=int)
                merge_labels_col[-n_isolated_nodes_col:] = merge_labels_col[
                    -n_isolated_nodes_col]
                combiner_col = membership_matrix(merge_labels_col)
                embedding_col = embedding_col.dot(combiner_col)

        self.embedding_row_ = embedding_row.toarray()
        self.embedding_col_ = embedding_col.toarray()
        self.embedding_ = self.embedding_row_

        return self
Example #19
0
def modularity(adjacency: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray,
               weights: Union[str, np.ndarray] = 'degree', weights_in: Union[str, np.ndarray] = 'degree',
               resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]:
    """Modularity of a clustering (node partition).

    * Graphs
    * Digraphs

    The modularity of a clustering is

    :math:`Q = \\sum_{i,j}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w_iw_j}{w^2}\\right)\\delta_{c_i,c_j}`
    for graphs,

    :math:`Q = \\sum_{i,j}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right)\\delta_{c_i,c_j}`
    for digraphs,

    where

    * :math:`c_i` is the cluster of node :math:`i`,\n
    * :math:`w_i` is the weight of node :math:`i`,\n
    * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n
    * :math:`w = 1^TA1` is the total weight,\n
    * :math:`\\delta` is the Kronecker symbol,\n
    * :math:`\\gamma \\ge 0` is the resolution parameter.

    Parameters
    ----------
    adjacency:
        Adjacency matrix of the graph.
    labels:
        Labels of nodes, vector of size :math:`n` .
    weights :
        Weights of nodes.
        ``'degree'`` (default), ``'uniform'`` or custom weights.
    weights_in :
        In-weights of nodes.
        ``None`` (default), ``'degree'``, ``'uniform'`` or custom weights.
        If ``None``, taken equal to weights.
    resolution:
        Resolution parameter (default = 1).
    return_all:
        If ``True``, return modularity, fit, diversity.

    Returns
    -------
    modularity : float
    fit: float, optional
    diversity: float, optional

    Example
    -------
    >>> from sknetwork.clustering import modularity
    >>> from sknetwork.data import house
    >>> adjacency = house()
    >>> labels = np.array([0, 0, 1, 1, 0])
    >>> np.round(modularity(adjacency, labels), 2)
    0.11
    """
    adjacency = check_format(adjacency).astype(float)
    check_square(adjacency)

    if len(labels) != adjacency.shape[0]:
        raise ValueError('Dimension mismatch between labels and adjacency matrix.')

    probs_row = check_probs(weights, adjacency)
    probs_col = check_probs(weights_in, adjacency.T)
    membership = membership_matrix(labels)

    fit: float = membership.multiply(adjacency.dot(membership)).data.sum() / adjacency.data.sum()
    div: float = membership.T.dot(probs_col).dot(membership.T.dot(probs_row))
    mod: float = fit - resolution * div
    if return_all:
        return mod, fit, div
    else:
        return mod
Example #20
0
def comodularity(adjacency: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray, resolution: float = 1,
                 return_all: bool = False) -> Union[float, Tuple[float, float, float]]:
    """Modularity of a clustering in the normalized co-neighborhood graph.

    * Graphs
    * Digraphs
    * Bigraphs

    Quality metric of a clustering given by:

    :math:`Q = \\sum_{i,j}\\left(\\dfrac{(AD_2^{-1}A^T)_{ij}}{w} - \\gamma \\dfrac{d_id_j}{w^2}\\right)
    \\delta_{c_i,c_j}`

    where

    * :math:`c_i` is the cluster of node `i`,\n
    * :math:`\\delta` is the Kronecker symbol,\n
    * :math:`\\gamma \\ge 0` is the resolution parameter.

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    labels :
       Labels of the nodes.
    resolution :
        Resolution parameter (default = 1).
    return_all :
        If ``True``, return modularity, fit, diversity.

    Returns
    -------
    modularity : float
    fit : float, optional
    diversity: float, optional

    Example
    -------
    >>> from sknetwork.clustering import comodularity
    >>> from sknetwork.data import house
    >>> adjacency = house()
    >>> labels = np.array([0, 0, 1, 1, 0])
    >>> np.round(comodularity(adjacency, labels), 2)
    0.06

    Notes
    -----
    Does not require the computation of the adjacency matrix of the normalized co-neighborhood graph.
    """

    adjacency = check_format(adjacency).astype(float)

    n_row, n_col = adjacency.shape
    total_weight = adjacency.data.sum()
    probs = adjacency.dot(np.ones(n_col)) / total_weight

    weights_col = adjacency.T.dot(np.ones(n_col))
    diag_col = diag_pinv(np.sqrt(weights_col))
    normalized_adjacency = (adjacency.dot(diag_col)).T.tocsr()

    if len(labels) != n_row:
        raise ValueError('The number of labels must match the number of rows.')

    membership = membership_matrix(labels)
    fit: float = ((normalized_adjacency.dot(membership)).data ** 2).sum() / total_weight
    div: float = np.linalg.norm(membership.T.dot(probs)) ** 2
    mod: float = fit - resolution * div

    if return_all:
        return mod, fit, div
    else:
        return mod