Esempio n. 1
0
    def fit(self, input_matrix: Union[sparse.csr_matrix,
                                      np.ndarray]) -> 'Ward':
        """Applies embedding method followed by the Ward algorithm.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`Ward`
        """
        self._init_vars()

        # input
        check_format(input_matrix)

        # embedding
        embedding, self.bipartite = get_embedding(input_matrix,
                                                  self.embedding_method,
                                                  self.co_cluster)

        # clustering
        ward = WardDense()
        self.dendrogram_ = ward.fit_transform(embedding)

        # output
        if self.co_cluster:
            self._split_vars(input_matrix.shape)

        return self
Esempio n. 2
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None,
            seeds_col: Optional[Union[dict, np.ndarray]] = None) -> 'CoPageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix.
        seeds_row :
            Seed rows, as a dict or a vector.
        seeds_col :
            Seed columns, as a dict or a vector.
            If both seeds_row and seeds_col are ``None``, the uniform distribution is used.

        Returns
        -------
        self: :class:`CoPageRank`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape

        operator = CoNeighborsOperator(biadjacency, True)
        seeds_row = seeds2probs(n_row, seeds_row)
        self.scores_row_ = get_pagerank(operator, seeds_row, damping_factor=self.damping_factor, solver=self.solver,
                                        n_iter=self.n_iter, tol=self.tol)

        operator = CoNeighborsOperator(biadjacency.T.tocsr(), True)
        seeds_col = seeds2probs(n_col, seeds_col)
        self.scores_col_ = get_pagerank(operator, seeds_col, damping_factor=self.damping_factor, solver=self.solver,
                                        n_iter=self.n_iter, tol=self.tol)

        self.scores_ = self.scores_row_

        return self
Esempio n. 3
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'PCA':
        """Compute the embedding of the graph.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`PCA`
        """
        adjacency = check_format(adjacency).asfptype()
        n_row, n_col = adjacency.shape
        adjacency_centered = SparseLR(
            adjacency,
            (-np.ones(n_row), adjacency.T.dot(np.ones(n_row)) / n_row))

        if isinstance(self.solver, str):
            self.solver = set_svd_solver(self.solver, adjacency)

        svd = self.solver
        svd.fit(adjacency_centered, self.n_components)
        self.embedding_row_ = svd.singular_vectors_left_
        self.embedding_col_ = svd.singular_vectors_right_
        self.embedding_ = svd.singular_vectors_left_
        self.singular_values_ = svd.singular_values_
        self.singular_vectors_left_ = svd.singular_vectors_left_
        self.singular_vectors_right_ = svd.singular_vectors_right_

        return self
Esempio n. 4
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Closeness':
        """Closeness centrality for connected graphs.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Closeness`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        check_connected(adjacency)
        n = adjacency.shape[0]

        if self.method == 'exact':
            n_sources = n
            sources = np.arange(n)
        elif self.method == 'approximate':
            n_sources = min(int(log(n) / self.tol**2), n)
            sources = np.random.choice(np.arange(n), n_sources, replace=False)
        else:
            raise ValueError(
                "Method should be either 'exact' or 'approximate'.")

        dists = distance(adjacency, n_jobs=self.n_jobs, sources=sources)

        self.scores_ = (
            (n - 1) * n_sources / n) / dists.T.dot(np.ones(n_sources))

        return self
Esempio n. 5
0
def connected_components(adjacency: sparse.csr_matrix,
                         connection: str = 'weak') -> np.ndarray:
    """Extract the connected components of the graph.

    * Graphs
    * Digraphs

    Based on SciPy (scipy.sparse.csgraph.connected_components).

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    connection :
        Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs.

    Returns
    -------
    labels : np.ndarray
        Connected component of each node.
    """
    adjacency = check_format(adjacency)
    if len(adjacency.data) == 0:
        raise ValueError('The graph is empty (no edge).')
    return sparse.csgraph.connected_components(adjacency,
                                               not is_symmetric(adjacency),
                                               connection, True)[1]
Esempio n. 6
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray, LinearOperator],
            seeds: Optional[Union[dict, np.ndarray]] = None) -> 'PageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix.
        seeds :
            Parameter to be used for Personalized PageRank.
            Restart distribution as a vector or a dict (node: weight).
            If ``None``, the uniform distribution is used (no personalization, default).

        Returns
        -------
        self: :class:`PageRank`
        """
        if not isinstance(adjacency, LinearOperator):
            adjacency = check_format(adjacency)
        check_square(adjacency)
        seeds = seeds2probs(adjacency.shape[0], seeds)
        self.scores_ = get_pagerank(adjacency, seeds, damping_factor=self.damping_factor, n_iter=self.n_iter,
                                    solver=self.solver, tol=self.tol)

        return self
Esempio n. 7
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) \
            -> 'BiPageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix.
        seeds_row :
            Seed rows, as a dict or a vector.
        seeds_col :
            Seed columns, as a dict or a vector.
            If both seeds_row and seeds_col are ``None``, the uniform distribution is used.

        Returns
        -------
        self: :class:`BiPageRank`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        adjacency = bipartite2undirected(biadjacency)
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)

        PageRank.fit(self, adjacency, seeds)
        self._split_vars(n_row)

        self.scores_row_ /= self.scores_row_.sum()
        self.scores_col_ /= self.scores_col_.sum()
        self.scores_ = self.scores_row_

        return self
Esempio n. 8
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None,
            initial_state: Optional = None) -> 'BiDiffusion':
        """Compute the diffusion (temperature at equilibrium).

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix, shape (n_row, n_col).
        seeds_row :
            Temperatures of row border nodes (dictionary or vector of size n_row). Negative temperatures ignored.
        seeds_col :
            Temperatures of column border nodes (dictionary or vector of size n_row). Negative temperatures ignored.
        initial_state :
            Initial state of temperatures.

        Returns
        -------
        self: :class:`BiDiffusion`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)

        adjacency = bipartite2undirected(biadjacency)
        Diffusion.fit(self, adjacency, seeds)
        self._split_vars(n_row)

        return self
Esempio n. 9
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) \
            -> 'BiPageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix.
        seeds_row, seeds_col :
            Parameter to be used for Personalized BiPageRank.
            Restart distribution as vectors or dicts on rows, columns (node: weight).
            If both seeds_row and seeds_col are ``None`` (default), the uniform distribution on rows is used.

        Returns
        -------
        self: :class:`BiPageRank`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        adjacency = bipartite2undirected(biadjacency)
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)

        PageRank.fit(self, adjacency, seeds)
        self._split_vars(n_row)

        self.scores_row_ /= self.scores_row_.sum()
        self.scores_col_ /= self.scores_col_.sum()
        self.scores_ = self.scores_row_

        return self
Esempio n. 10
0
    def fit(
        self, biadjacency: Union[sparse.csr_matrix,
                                 np.ndarray]) -> 'BiLouvainHierarchy':
        """Applies Louvain hierarchical clustering to

        :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`

        where :math:`B` is the biadjacency matrix of the graphs.

        Parameters
        ----------
        biadjacency:
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiLouvainHierarchy`
        """
        biadjacency = check_format(biadjacency)
        adjacency = bipartite2undirected(biadjacency)

        self.dendrogram_ = self.louvain_hierarchy.fit_transform(adjacency)
        self._split_vars(biadjacency.shape)

        return self
Esempio n. 11
0
    def fit(self,
            biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None,
            seeds_col: Optional[Union[dict, np.ndarray]] = None,
            init: Optional[float] = None) -> 'BiDirichlet':
        """Compute the solution to the Dirichlet problem (temperatures at equilibrium).

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix, shape (n_row, n_col).
        seeds_row :
            Temperatures of seed rows (dictionary or vector of size n_row). Negative temperatures ignored.
        seeds_col :
            Temperatures of seed columns (dictionary or vector of size n_col). Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`BiDirichlet`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)
        adjacency = bipartite2undirected(biadjacency)
        Dirichlet.fit(self, adjacency, seeds, init)
        self._split_vars(n_row)

        return self
Esempio n. 12
0
def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
                  force_bipartite: bool = False, force_directed: bool = False)\
        -> Tuple[sparse.csr_matrix, bool]:
    """Check the input matrix and return a proper adjacency matrix.
    Parameters
    ----------
    input_matrix :
        Adjacency matrix of biadjacency matrix of the graph.
    allow_directed :
        If ``True`` (default), allow the graph to be directed.
    force_bipartite : bool
        If ``True``, return the adjacency matrix of a bipartite graph.
        Otherwise (default), do it only if the input matrix is not square or not symmetric
        with ``allow_directed=False``.
    force_directed :
        If ``True`` return :math:`A  = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
        Otherwise (default), return :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
    """
    input_matrix = check_format(input_matrix)
    bipartite = False
    if force_bipartite or not is_square(input_matrix) or not (
            allow_directed or is_symmetric(input_matrix)):
        bipartite = True
    if bipartite:
        if force_directed:
            adjacency = bipartite2directed(input_matrix)
        else:
            adjacency = bipartite2undirected(input_matrix)
    else:
        adjacency = input_matrix
    return adjacency, bipartite
Esempio n. 13
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Harmonic':
        """Harmonic centrality for connected graphs.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Harmonic`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n = adjacency.shape[0]
        indices = np.arange(n)

        paths = shortest_path(adjacency, n_jobs=self.n_jobs, indices=indices)

        np.fill_diagonal(paths, 1)
        inv = (1 / paths)
        np.fill_diagonal(inv, 0)

        self.scores_ = inv.dot(np.ones(n))

        return self
Esempio n. 14
0
    def fit(
        self,
        biadjacency: Union[sparse.csr_matrix, np.ndarray],
        seeds_row: Union[np.ndarray, dict],
        seeds_col: Optional[Union[np.ndarray,
                                  dict]] = None) -> 'BiPropagation':
        """Node classification by k-nearest neighbors in the embedding space.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix of the graph.
        seeds_row :
            Seed rows. Can be a dict {node: label} or an array where "-1" means no label.
        seeds_col :
            Seed columns (optional). Same format.

        Returns
        -------
        self: :class:`BiPropagation`
        """
        n_row, n_col = biadjacency.shape
        biadjacency = check_format(biadjacency)
        adjacency = bipartite2undirected(biadjacency)
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col).astype(int)

        Propagation.fit(self, adjacency, seeds)
        self._split_vars(n_row)

        return self
Esempio n. 15
0
    def fit(
        self, biadjacency: Union[sparse.csr_matrix, np.ndarray]
    ) -> 'BiPropagationClustering':
        """Clustering.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiPropagationClustering`
        """
        n_row, n_col = biadjacency.shape
        biadjacency = check_format(biadjacency)
        adjacency = bipartite2undirected(biadjacency)

        propagation = PropagationClustering(self.n_iter, self.node_order,
                                            self.weighted)

        self.labels_ = propagation.fit_transform(adjacency)
        self._split_vars(n_row)
        self._secondary_outputs(biadjacency)

        return self
Esempio n. 16
0
    def fit(
        self, adjacency: Union[sparse.csr_matrix,
                               np.ndarray]) -> 'LaplacianEmbedding':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`LaplacianEmbedding`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        regularize: bool = not (self.regularization is None
                                or self.regularization == 0.)
        check_scaling(self.scaling, adjacency, regularize)

        if regularize:
            solver: EigSolver = LanczosEig()
        else:
            solver = set_solver(self.solver, adjacency)
        n_components = 1 + check_n_components(self.n_components, n - 2)

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n
            laplacian = LaplacianOperator(adjacency, regularization)
        else:
            weight_diag = sparse.diags(weights, format='csr')
            laplacian = weight_diag - adjacency

        solver.which = 'SM'
        solver.fit(matrix=laplacian, n_components=n_components)
        eigenvalues = solver.eigenvalues_[1:]
        eigenvectors = solver.eigenvectors_[:, 1:]

        embedding = eigenvectors.copy()

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv(eigenvalues**self.scaling)
            embedding = eigenvalues_inv_diag.dot(embedding.T).T

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self
Esempio n. 17
0
def dasgupta_cost(adjacency: sparse.csr_matrix,
                  dendrogram: np.ndarray,
                  weights: str = 'uniform',
                  normalized: bool = False) -> float:
    """Dasgupta's cost of a hierarchy.

    Expected size (weights = ``'uniform'``) or expected volume (weights = ``'degree'``) of the cluster induced by
    random edge sampling (closest ancestor of the two nodes in the hierarchy).

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    dendrogram :
        Dendrogram.
    weights :
        Weights of nodes.
        ``'degree'`` or ``'uniform'`` (default).
    normalized :
        If ``True``, normalized cost (between 0 and 1).

    Returns
    -------
    cost : float
        Cost.

    Example
    -------
    >>> from sknetwork.hierarchy import dasgupta_score, Paris
    >>> from sknetwork.data import house
    >>> paris = Paris()
    >>> adjacency = house()
    >>> dendrogram = paris.fit_transform(adjacency)
    >>> cost = dasgupta_cost(adjacency, dendrogram)
    >>> np.round(cost, 2)
    3.33

    References
    ----------
    Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
    Proceedings of ACM symposium on Theory of Computing.
    """
    adjacency = check_format(adjacency)
    check_square(adjacency)

    n = adjacency.shape[0]
    check_min_size(n, 2)

    edge_sampling, _, cluster_weight = get_sampling_distributions(
        adjacency, dendrogram, weights)
    cost = edge_sampling.dot(cluster_weight)

    if not normalized:
        if weights == 'degree':
            cost *= adjacency.data.sum()
        else:
            cost *= n

    return cost
Esempio n. 18
0
def co_neighbor_graph(adjacency: Union[sparse.csr_matrix, np.ndarray],
                      normalized: bool = True,
                      method='knn',
                      n_neighbors: int = 5,
                      n_components: int = 8) -> sparse.csr_matrix:
    """Compute the co-neighborhood adjacency.

    * Graphs
    * Digraphs
    * Bigraphs

    :math:`\\tilde{A} = AF^{-1}A^T`,

    where F is a weight matrix.

    Parameters
    ----------
    adjacency:
        Adjacency of the input graph.
    normalized:
        If ``True``, F is the diagonal in-degree matrix :math:`F = \\text{diag}(A^T1)`.
        Otherwise, F is the identity matrix.
    method:
        Either ``'exact'`` or ``'knn'``. If 'exact' the output is computed with matrix multiplication.
        However, the density can be much higher than in the input graph and this can trigger Memory errors.
        If ``'knn'``, the co-neighborhood is approximated through KNNDense-search in an appropriate spectral embedding
        space.
    n_neighbors:
        Number of neighbors for the KNNDense search. Only useful if ``method='knn'``.
    n_components:
        Dimension of the embedding space. Only useful if ``method='knn'``.

    Returns
    -------
    adjacency : sparse.csr_matrix
        Adjacency of the co-neighborhood.
    """
    adjacency = check_format(adjacency).astype(float)

    if method == 'exact':
        if normalized:
            forward = normalize(adjacency.T).tocsr()
        else:
            forward = adjacency.T
        return adjacency.dot(forward)

    elif method == 'knn':
        if normalized:
            algo = GSVD(n_components, regularization=None)
        else:
            algo = SVD(n_components, regularization=None)
        embedding = algo.fit_transform(adjacency)
        knn = KNNDense(n_neighbors, undirected=True)
        knn.fit(embedding)
        return knn.adjacency_
    else:
        raise ValueError('method must be "exact" or "knn".')
Esempio n. 19
0
def largest_connected_component(adjacency: Union[sparse.csr_matrix,
                                                 np.ndarray],
                                return_labels: bool = False):
    """Extract the largest connected component of a graph. Bipartite graphs are treated as undirected.

    * Graphs
    * Digraphs
    * Bigraphs

    Parameters
    ----------
    adjacency :
        Adjacency or biadjacency matrix of the graph.
    return_labels : bool
        Whether to return the indices of the new nodes in the original graph.

    Returns
    -------
    new_adjacency : sparse.csr_matrix
        Adjacency or biadjacency matrix of the largest connected component.
    indices : array or tuple of array
        Indices of the nodes in the original graph. For biadjacency matrices,
        ``indices[0]`` corresponds to the rows and ``indices[1]`` to the columns.

    """
    adjacency = check_format(adjacency)
    n_row, n_col = adjacency.shape
    if not is_square(adjacency):
        bipartite: bool = True
        full_adjacency = sparse.bmat([[None, adjacency], [adjacency.T, None]],
                                     format='csr')
    else:
        bipartite: bool = False
        full_adjacency = adjacency

    labels = connected_components(full_adjacency)
    unique_labels, counts = np.unique(labels, return_counts=True)
    component_label = unique_labels[np.argmax(counts)]
    component_indices = np.where(labels == component_label)[0]

    if bipartite:
        split_ix = np.searchsorted(component_indices, n_row)
        row_ix, col_ix = component_indices[:split_ix], component_indices[
            split_ix:] - n_row
    else:
        row_ix, col_ix = component_indices, component_indices
    new_adjacency = adjacency[row_ix, :]
    new_adjacency = (new_adjacency.tocsc()[:, col_ix]).tocsr()

    if return_labels:
        if bipartite:
            return new_adjacency, (row_ix, col_ix)
        else:
            return new_adjacency, row_ix
    else:
        return new_adjacency
Esempio n. 20
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict] = None) \
            -> 'Propagation':
        """Node classification by label propagation.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.

        Returns
        -------
        self: :class:`Propagation`
        """
        adjacency = check_format(adjacency)
        n = adjacency.shape[0]
        index_seed, index_remain, labels_seed = self._instanciate_vars(
            adjacency, seeds)

        if self.node_order == 'random':
            np.random.shuffle(index_remain)
        elif self.node_order == 'decreasing':
            index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32)
            index_remain = index[index_remain]
        elif self.node_order == 'increasing':
            index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32)
            index_remain = index[index_remain]

        labels = -np.ones(n, dtype=np.int32)
        labels[index_seed] = labels_seed
        labels_remain = np.zeros_like(index_remain, dtype=np.int32)

        indptr = adjacency.indptr.astype(np.int32)
        indices = adjacency.indices.astype(np.int32)
        if self.weighted:
            data = adjacency.data.astype(np.float32)
        else:
            data = np.ones(n, dtype=np.float32)

        t = 0
        while t < self.n_iter and not np.array_equal(labels_remain,
                                                     labels[index_remain]):
            t += 1
            labels_remain = labels[index_remain].copy()
            labels = np.asarray(
                vote_update(indptr, indices, data, labels, index_remain))

        membership = membership_matrix(labels)
        membership = normalize(adjacency.dot(membership))

        self.labels_ = labels
        self.membership_ = membership

        return self
Esempio n. 21
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None, initial_state: Optional = None) -> 'Diffusion':
        """Compute the diffusion (temperature at equilibrium).

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Temperatures of border nodes (dictionary or vector). Negative temperatures ignored.
        initial_state :
            Initial state of temperatures.

        Returns
        -------
        self: :class:`Diffusion`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n: int = adjacency.shape[0]
        if seeds is None:
            self.scores_ = np.ones(n) / n
            return self

        seeds = check_seeds(seeds, n)
        b, border = limit_conditions(seeds)
        tmin, tmax = np.min(b[border]), np.max(b)

        interior: sparse.csr_matrix = sparse.diags(~border, shape=(n, n), format='csr', dtype=float)
        diffusion_matrix = interior.dot(normalize(adjacency))

        if initial_state is None:
            if tmin != tmax:
                initial_state = b[border].mean() * np.ones(n)
            else:
                initial_state = np.zeros(n)
            initial_state[border] = b[border]

        if self.n_iter > 0:
            scores = initial_state
            for i in range(self.n_iter):
                scores = diffusion_matrix.dot(scores)
                scores[border] = b[border]

        else:
            a = sparse.eye(n, format='csr', dtype=float) - diffusion_matrix
            scores, info = bicgstab(a, b, atol=0., x0=initial_state)
            self._scipy_solver_info(info)

        if tmin != tmax:
            self.scores_ = np.clip(scores, tmin, tmax)
        else:
            self.scores_ = scores
        return self
Esempio n. 22
0
    def __init__(self, adjacency: Union[sparse.csr_matrix, np.ndarray], normalized: bool = True):
        adjacency = check_format(adjacency).astype(float)
        n = adjacency.shape[0]
        super(CoNeighborsOperator, self).__init__(dtype=float, shape=(n, n))

        if normalized:
            self.forward = normalize(adjacency.T).tocsr()
        else:
            self.forward = adjacency.T

        self.backward = adjacency
Esempio n. 23
0
    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'KMeans':
        """Apply embedding method followed by K-means.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`KMeans`
        """
        self._init_vars()

        # input
        check_format(input_matrix)
        if self.co_cluster:
            check_n_clusters(self.n_clusters, np.sum(input_matrix.shape))
        else:
            check_n_clusters(self.n_clusters, input_matrix.shape[0])

        # embedding
        embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster)

        # clustering
        kmeans = KMeansDense(self.n_clusters)
        kmeans.fit(embedding)

        # sort
        if self.sort_clusters:
            labels = reindex_labels(kmeans.labels_)
        else:
            labels = kmeans.labels_

        # output
        self.labels_ = labels
        if self.co_cluster:
            self._split_vars(input_matrix.shape)
        self._secondary_outputs(input_matrix)

        return self
Esempio n. 24
0
    def __init__(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
                 coeffs: np.ndarray):
        if coeffs.shape[0] == 0:
            raise ValueError('A polynome requires at least one coefficient.')
        adjacency = check_format(adjacency)
        check_square(adjacency)
        shape = adjacency.shape
        dtype = adjacency.dtype
        super(Polynome, self).__init__(dtype=dtype, shape=shape)

        self.adjacency = adjacency
        self.coeffs = coeffs
Esempio n. 25
0
    def fit(self,
            adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None,
            init: Optional[float] = None) -> 'Dirichlet':
        """Compute the solution to the Dirichlet problem (temperatures at equilibrium).

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Temperatures of seed nodes (dictionary or vector). Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`Dirichlet`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n: int = adjacency.shape[0]
        if seeds is None:
            self.scores_ = np.ones(n) / n
            return self

        seeds = check_seeds(seeds, n)
        border = (seeds >= 0)

        if init is None:
            scores = seeds[border].mean() * np.ones(n)
        else:
            scores = init * np.ones(n)
        scores[border] = seeds[border]

        if self.n_iter > 0:
            diffusion = DirichletOperator(adjacency, self.damping_factor,
                                          border)
            for i in range(self.n_iter):
                scores = diffusion.dot(scores)
                scores[border] = seeds[border]
        else:
            a = DeltaDirichletOperator(adjacency, self.damping_factor, border)
            b = -seeds
            b[~border] = 0
            scores, info = bicgstab(a, b, atol=0., x0=scores)
            self._scipy_solver_info(info)

        tmin, tmax = seeds[border].min(), seeds[border].max()
        self.scores_ = np.clip(scores, tmin, tmax)

        return self
Esempio n. 26
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) \
            -> 'Propagation':
        """Node classification by label propagation.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.

        Returns
        -------
        self: :class:`Propagation`
        """
        adjacency = check_format(adjacency)
        n = adjacency.shape[0]
        index_seed, index_remain, labels_seed = self._instanciate_vars(
            adjacency, seeds)

        labels = -np.ones(n, dtype=int)
        labels[index_seed] = labels_seed
        labels_remain = np.zeros_like(index_remain, dtype=int)
        t = 0
        while t < self.n_iter and not np.array_equal(labels_remain,
                                                     labels[index_remain]):
            t += 1
            labels_remain = labels[index_remain].copy()
            for i in index_remain:
                labels_ = labels[
                    adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i +
                                                                           1]]]
                labels_ = labels_[labels_ >= 0]
                if len(labels_):
                    labels_unique, counts = np.unique(labels_,
                                                      return_counts=True)
                    labels[i] = labels_unique[np.argmax(counts)]

        membership = membership_matrix(labels)
        membership = normalize(adjacency.dot(membership))

        self.labels_ = labels
        self.membership_ = membership

        return self
Esempio n. 27
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]):
        """Fit algorithm to the data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph

        Returns
        -------
        self : :class:`FirstOrder`
        """
        adjacency = check_format(adjacency)
        adjacency.sort_indices()
        self.indptr_ = adjacency.indptr.astype(np.int32)
        self.indices_ = adjacency.indices.astype(np.int32)

        return self
Esempio n. 28
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiRandomProjection':
        """Compute the embedding.

        Parameters
        ----------
        biadjacency:
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiRandomProjection`
        """
        biadjacency = check_format(biadjacency)
        n_row, _ = biadjacency.shape
        RandomProjection.fit(self, bipartite2undirected(biadjacency))
        self._split_vars(n_row)

        return self
Esempio n. 29
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'HITS':
        """Compute HITS algorithm with a spectral method.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`HITS`
        """
        adjacency = check_format(adjacency)

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: SVDSolver = LanczosSVD()
            else:
                self.solver: SVDSolver = HalkoSVD()

        self.solver.fit(adjacency, 1)
        hubs: np.ndarray = self.solver.singular_vectors_left_.reshape(-1)
        authorities: np.ndarray = self.solver.singular_vectors_right_.reshape(
            -1)

        h_pos, h_neg = (hubs > 0).sum(), (hubs < 0).sum()
        a_pos, a_neg = (authorities > 0).sum(), (authorities < 0).sum()

        if h_pos > h_neg:
            hubs = np.clip(hubs, a_min=0., a_max=None)
        else:
            hubs = np.clip(-hubs, a_min=0., a_max=None)

        if a_pos > a_neg:
            authorities = np.clip(authorities, a_min=0., a_max=None)
        else:
            authorities = np.clip(-authorities, a_min=0., a_max=None)

        self.scores_row_ = hubs
        self.scores_col_ = authorities
        self.scores_ = hubs

        return self
Esempio n. 30
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiKatz':
        """Katz centrality.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiKatz`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        adjacency = bipartite2undirected(biadjacency)

        Katz.fit(self, adjacency)
        self._split_vars(n_row)
        return self