Esempio n. 1
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'KMeans':
        """Apply embedding method followed by K-means.

        Parameters
        ----------
        adjacency:
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`KMeans`
        """
        n = adjacency.shape[0]
        check_n_clusters(self.n_clusters, n)

        embedding = self.embedding_method.fit_transform(adjacency)
        kmeans = KMeansDense(self.n_clusters)
        kmeans.fit(embedding)

        if self.sort_clusters:
            labels = reindex_labels(kmeans.labels_)
        else:
            labels = kmeans.labels_

        self.labels_ = labels
        self._secondary_outputs(adjacency)

        return self
Esempio n. 2
0
    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'KMeans':
        """Apply embedding method followed by K-means.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`KMeans`
        """
        self._init_vars()

        # input
        check_format(input_matrix)
        if self.co_cluster:
            check_n_clusters(self.n_clusters, np.sum(input_matrix.shape))
        else:
            check_n_clusters(self.n_clusters, input_matrix.shape[0])

        # embedding
        embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster)

        # clustering
        kmeans = KMeansDense(self.n_clusters)
        kmeans.fit(embedding)

        # sort
        if self.sort_clusters:
            labels = reindex_labels(kmeans.labels_)
        else:
            labels = kmeans.labels_

        # output
        self.labels_ = labels
        if self.co_cluster:
            self._split_vars(input_matrix.shape)
        self._secondary_outputs(input_matrix)

        return self
Esempio n. 3
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Louvain':
        """Fit algorithm to the data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Louvain`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n_nodes = adjacency.shape[0]

        probs_out = check_probs('degree', adjacency)
        probs_in = check_probs('degree', adjacency.T)

        nodes = np.arange(n_nodes)
        if self.shuffle_nodes:
            nodes = self.random_state.permutation(nodes)
            adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr()

        adjacency_norm = adjacency / adjacency.data.sum()

        membership = sparse.identity(n_nodes, format='csr')
        increase = True
        count_aggregations = 0
        self.log.print("Starting with", n_nodes, "nodes.")
        while increase:
            count_aggregations += 1

            current_labels, pass_increase = self._optimize(
                n_nodes, adjacency_norm, probs_out, probs_in)
            _, current_labels = np.unique(current_labels, return_inverse=True)

            if pass_increase <= self.tol_aggregation:
                increase = False
            else:
                membership_agg = membership_matrix(current_labels)
                membership = membership.dot(membership_agg)
                n_nodes, adjacency_norm, probs_out, probs_in = self._aggregate(
                    adjacency_norm, probs_out, probs_in, membership_agg)

                if n_nodes == 1:
                    break
            self.log.print("Aggregation", count_aggregations, "completed with",
                           n_nodes, "clusters and ", pass_increase,
                           "increment.")
            if count_aggregations == self.n_aggregations:
                break

        if self.sort_clusters:
            labels = reindex_labels(membership.indices)
        else:
            labels = membership.indices
        if self.shuffle_nodes:
            reverse = np.empty(nodes.size, nodes.dtype)
            reverse[nodes] = np.arange(nodes.size)
            labels = labels[reverse]

        self.labels_ = labels
        self._secondary_outputs(adjacency)

        return self
Esempio n. 4
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Louvain':
        """Fit algorithm to the data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Louvain`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n = adjacency.shape[0]

        if self.modularity == 'potts':
            probs_ou = check_probs('uniform', adjacency)
            probs_in = probs_ou.copy()
        elif self.modularity == 'newman':
            probs_ou = check_probs('degree', adjacency)
            probs_in = probs_ou.copy()
        elif self.modularity == 'dugue':
            probs_ou = check_probs('degree', adjacency)
            probs_in = check_probs('degree', adjacency.T)
        else:
            raise ValueError('Unknown modularity function.')

        nodes = np.arange(n, dtype=np.int32)
        if self.shuffle_nodes:
            nodes = self.random_state.permutation(nodes)
            adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr()

        adjacency_clust = adjacency / adjacency.data.sum()

        membership = sparse.identity(n, format='csr')
        increase = True
        count_aggregations = 0
        self.log.print("Starting with", n, "nodes.")
        while increase:
            count_aggregations += 1

            labels_clust, pass_increase = self._optimize(
                adjacency_clust, probs_ou, probs_in)
            _, labels_clust = np.unique(labels_clust, return_inverse=True)

            if pass_increase <= self.tol_aggregation:
                increase = False
            else:
                membership_clust = membership_matrix(labels_clust)
                membership = membership.dot(membership_clust)
                adjacency_clust, probs_ou, probs_in = self._aggregate(
                    adjacency_clust, probs_ou, probs_in, membership_clust)

                n = adjacency_clust.shape[0]
                if n == 1:
                    break
            self.log.print("Aggregation", count_aggregations, "completed with",
                           n, "clusters and ", pass_increase, "increment.")
            if count_aggregations == self.n_aggregations:
                break

        if self.sort_clusters:
            labels = reindex_labels(membership.indices)
        else:
            labels = membership.indices
        if self.shuffle_nodes:
            reverse = np.empty(nodes.size, nodes.dtype)
            reverse[nodes] = np.arange(nodes.size)
            labels = labels[reverse]

        self.labels_ = labels
        self._secondary_outputs(adjacency)

        return self
Esempio n. 5
0
    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
        """Fit algorithm to data.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite :
            If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.

        Returns
        -------
        self: :class:`Louvain`
        """
        self._init_vars()

        if self.modularity == 'dugue':
            adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=True,
                                                      force_bipartite=force_bipartite)
        else:
            adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)

        n = adjacency.shape[0]

        if self.modularity == 'potts':
            probs_out = get_probs('uniform', adjacency)
            probs_in = probs_out.copy()
        elif self.modularity == 'newman':
            probs_out = get_probs('degree', adjacency)
            probs_in = probs_out.copy()
        elif self.modularity == 'dugue':
            probs_out = get_probs('degree', adjacency)
            probs_in = get_probs('degree', adjacency.T)
        else:
            raise ValueError('Unknown modularity function.')

        nodes = np.arange(n)
        if self.shuffle_nodes:
            nodes = self.random_state.permutation(nodes)
            adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr()

        adjacency_cluster = adjacency / adjacency.data.sum()

        membership = sparse.identity(n, format='csr')
        increase = True
        count_aggregations = 0
        self.log.print("Starting with", n, "nodes.")
        while increase:
            count_aggregations += 1

            labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in)
            _, labels_cluster = np.unique(labels_cluster, return_inverse=True)

            if pass_increase <= self.tol_aggregation:
                increase = False
            else:
                membership_cluster = membership_matrix(labels_cluster)
                membership = membership.dot(membership_cluster)
                adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in,
                                                                        membership_cluster)

                n = adjacency_cluster.shape[0]
                if n == 1:
                    break
            self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ",
                           pass_increase, "increment.")
            if count_aggregations == self.n_aggregations:
                break

        if self.sort_clusters:
            labels = reindex_labels(membership.indices)
        else:
            labels = membership.indices
        if self.shuffle_nodes:
            reverse = np.empty(nodes.size, nodes.dtype)
            reverse[nodes] = np.arange(nodes.size)
            labels = labels[reverse]

        self.labels_ = labels
        if self.bipartite:
            self._split_vars(input_matrix.shape)
        self._secondary_outputs(input_matrix)

        return self
Esempio n. 6
0
    def fit(self, biadjacency: Union[sparse.csr_matrix,
                                     np.ndarray]) -> 'BiKMeans':
        """Apply embedding method followed by clustering to the graph.

        Parameters
        ----------
        biadjacency:
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiKMeans`
        """
        n_row, n_col = biadjacency.shape
        check_n_clusters(self.n_clusters, n_row)

        method = self.embedding_method
        method.fit(biadjacency)

        if self.co_cluster:
            embedding = np.vstack(
                (method.embedding_row_, method.embedding_col_))
        else:
            embedding = method.embedding_

        kmeans = KMeansDense(self.n_clusters)
        kmeans.fit(embedding)

        if self.sort_clusters:
            labels = reindex_labels(kmeans.labels_)
        else:
            labels = kmeans.labels_

        self.labels_ = labels
        if self.co_cluster:
            self._split_vars(n_row)
        else:
            self.labels_row_ = labels

        if self.return_membership:
            membership_row = membership_matrix(self.labels_row_,
                                               n_labels=self.n_clusters)
            if self.labels_col_ is not None:
                membership_col = membership_matrix(self.labels_col_,
                                                   n_labels=self.n_clusters)
                self.membership_row_ = normalize(
                    biadjacency.dot(membership_col))
                self.membership_col_ = normalize(
                    biadjacency.T.dot(membership_row))
            else:
                self.membership_row_ = normalize(
                    biadjacency.dot(biadjacency.T.dot(membership_row)))
            self.membership_ = self.membership_row_

        if self.return_aggregate:
            membership_row = membership_matrix(self.labels_row_,
                                               n_labels=self.n_clusters)
            biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency))
            if self.labels_col_ is not None:
                membership_col = membership_matrix(self.labels_col_,
                                                   n_labels=self.n_clusters)
                biadjacency_ = biadjacency_.dot(membership_col)
            self.biadjacency_ = biadjacency_

        return self