Python check_square Beispiele, sknetwork.utils.check.check_square Python Beispiele

Beispiel #1

0

Datei anzeigen

    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray, LinearOperator],
            seeds: Optional[Union[dict, np.ndarray]] = None) -> 'PageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix.
        seeds :
            Parameter to be used for Personalized PageRank.
            Restart distribution as a vector or a dict (node: weight).
            If ``None``, the uniform distribution is used (no personalization, default).

        Returns
        -------
        self: :class:`PageRank`
        """
        if not isinstance(adjacency, LinearOperator):
            adjacency = check_format(adjacency)
        check_square(adjacency)
        seeds = seeds2probs(adjacency.shape[0], seeds)
        self.scores_ = get_pagerank(adjacency, seeds, damping_factor=self.damping_factor, n_iter=self.n_iter,
                                    solver=self.solver, tol=self.tol)

        return self

Beispiel #2

0

Datei anzeigen

    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Harmonic':
        """Harmonic centrality for connected graphs.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Harmonic`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n = adjacency.shape[0]
        indices = np.arange(n)

        paths = shortest_path(adjacency, n_jobs=self.n_jobs, indices=indices)

        np.fill_diagonal(paths, 1)
        inv = (1 / paths)
        np.fill_diagonal(inv, 0)

        self.scores_ = inv.dot(np.ones(n))

        return self

Beispiel #3

0

Datei anzeigen

    def fit(
        self, adjacency: Union[sparse.csr_matrix,
                               np.ndarray]) -> 'LaplacianEmbedding':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`LaplacianEmbedding`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        regularize: bool = not (self.regularization is None
                                or self.regularization == 0.)
        check_scaling(self.scaling, adjacency, regularize)

        if regularize:
            solver: EigSolver = LanczosEig()
        else:
            solver = set_solver(self.solver, adjacency)
        n_components = 1 + check_n_components(self.n_components, n - 2)

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n
            laplacian = LaplacianOperator(adjacency, regularization)
        else:
            weight_diag = sparse.diags(weights, format='csr')
            laplacian = weight_diag - adjacency

        solver.which = 'SM'
        solver.fit(matrix=laplacian, n_components=n_components)
        eigenvalues = solver.eigenvalues_[1:]
        eigenvectors = solver.eigenvectors_[:, 1:]

        embedding = eigenvectors.copy()

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv(eigenvalues**self.scaling)
            embedding = eigenvalues_inv_diag.dot(embedding.T).T

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self

Beispiel #4

0

Datei anzeigen

Datei: closeness.py Projekt: sknetwork-team/scikit-network

    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Closeness':
        """Closeness centrality for connected graphs.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Closeness`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        check_connected(adjacency)
        n = adjacency.shape[0]

        if self.method == 'exact':
            n_sources = n
            sources = np.arange(n)
        elif self.method == 'approximate':
            n_sources = min(int(log(n) / self.tol**2), n)
            sources = np.random.choice(np.arange(n), n_sources, replace=False)
        else:
            raise ValueError(
                "Method should be either 'exact' or 'approximate'.")

        dists = distance(adjacency, n_jobs=self.n_jobs, sources=sources)

        self.scores_ = (
            (n - 1) * n_sources / n) / dists.T.dot(np.ones(n_sources))

        return self

Beispiel #5

0

Datei anzeigen

Datei: metrics.py Projekt: sknetwork-team/scikit-network

def dasgupta_cost(adjacency: sparse.csr_matrix,
                  dendrogram: np.ndarray,
                  weights: str = 'uniform',
                  normalized: bool = False) -> float:
    """Dasgupta's cost of a hierarchy.

    Expected size (weights = ``'uniform'``) or expected volume (weights = ``'degree'``) of the cluster induced by
    random edge sampling (closest ancestor of the two nodes in the hierarchy).

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    dendrogram :
        Dendrogram.
    weights :
        Weights of nodes.
        ``'degree'`` or ``'uniform'`` (default).
    normalized :
        If ``True``, normalized cost (between 0 and 1).

    Returns
    -------
    cost : float
        Cost.

    Example
    -------
    >>> from sknetwork.hierarchy import dasgupta_score, Paris
    >>> from sknetwork.data import house
    >>> paris = Paris()
    >>> adjacency = house()
    >>> dendrogram = paris.fit_transform(adjacency)
    >>> cost = dasgupta_cost(adjacency, dendrogram)
    >>> np.round(cost, 2)
    3.33

    References
    ----------
    Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
    Proceedings of ACM symposium on Theory of Computing.
    """
    adjacency = check_format(adjacency)
    check_square(adjacency)

    n = adjacency.shape[0]
    check_min_size(n, 2)

    edge_sampling, _, cluster_weight = get_sampling_distributions(
        adjacency, dendrogram, weights)
    cost = edge_sampling.dot(cluster_weight)

    if not normalized:
        if weights == 'degree':
            cost *= adjacency.data.sum()
        else:
            cost *= n

    return cost

Beispiel #6

0

Datei anzeigen

Datei: diffusion.py Projekt: zhongkailv/scikit-network

    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None, initial_state: Optional = None) -> 'Diffusion':
        """Compute the diffusion (temperature at equilibrium).

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Temperatures of border nodes (dictionary or vector). Negative temperatures ignored.
        initial_state :
            Initial state of temperatures.

        Returns
        -------
        self: :class:`Diffusion`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n: int = adjacency.shape[0]
        if seeds is None:
            self.scores_ = np.ones(n) / n
            return self

        seeds = check_seeds(seeds, n)
        b, border = limit_conditions(seeds)
        tmin, tmax = np.min(b[border]), np.max(b)

        interior: sparse.csr_matrix = sparse.diags(~border, shape=(n, n), format='csr', dtype=float)
        diffusion_matrix = interior.dot(normalize(adjacency))

        if initial_state is None:
            if tmin != tmax:
                initial_state = b[border].mean() * np.ones(n)
            else:
                initial_state = np.zeros(n)
            initial_state[border] = b[border]

        if self.n_iter > 0:
            scores = initial_state
            for i in range(self.n_iter):
                scores = diffusion_matrix.dot(scores)
                scores[border] = b[border]

        else:
            a = sparse.eye(n, format='csr', dtype=float) - diffusion_matrix
            scores, info = bicgstab(a, b, atol=0., x0=initial_state)
            self._scipy_solver_info(info)

        if tmin != tmax:
            self.scores_ = np.clip(scores, tmin, tmax)
        else:
            self.scores_ = scores
        return self

Beispiel #7

0

Datei anzeigen

    def fit(self,
            adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None,
            init: Optional[float] = None) -> 'Dirichlet':
        """Compute the solution to the Dirichlet problem (temperatures at equilibrium).

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Temperatures of seed nodes (dictionary or vector). Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`Dirichlet`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n: int = adjacency.shape[0]
        if seeds is None:
            self.scores_ = np.ones(n) / n
            return self

        seeds = check_seeds(seeds, n)
        border = (seeds >= 0)

        if init is None:
            scores = seeds[border].mean() * np.ones(n)
        else:
            scores = init * np.ones(n)
        scores[border] = seeds[border]

        if self.n_iter > 0:
            diffusion = DirichletOperator(adjacency, self.damping_factor,
                                          border)
            for i in range(self.n_iter):
                scores = diffusion.dot(scores)
                scores[border] = seeds[border]
        else:
            a = DeltaDirichletOperator(adjacency, self.damping_factor, border)
            b = -seeds
            b[~border] = 0
            scores, info = bicgstab(a, b, atol=0., x0=scores)
            self._scipy_solver_info(info)

        tmin, tmax = seeds[border].min(), seeds[border].max()
        self.scores_ = np.clip(scores, tmin, tmax)

        return self

Beispiel #8

0

Datei anzeigen

Datei: polynome.py Projekt: zhongkailv/scikit-network

    def __init__(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
                 coeffs: np.ndarray):
        if coeffs.shape[0] == 0:
            raise ValueError('A polynome requires at least one coefficient.')
        adjacency = check_format(adjacency)
        check_square(adjacency)
        shape = adjacency.shape
        dtype = adjacency.dtype
        super(Polynome, self).__init__(dtype=dtype, shape=shape)

        self.adjacency = adjacency
        self.coeffs = coeffs

Beispiel #9

0

Datei anzeigen

    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) \
            -> 'Diffusion':
        """Compute the diffusion (temperatures at equilibrium).

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Temperatures of seed nodes in initial state (dictionary or vector). Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`Diffusion`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n: int = adjacency.shape[0]
        if seeds is None:
            self.scores_ = np.ones(n) / n
            return self

        seeds = check_seeds(seeds, n)
        border = (seeds >= 0)

        if init is None:
            scores = seeds[border].mean() * np.ones(n)
        else:
            scores = init * np.ones(n)
        scores[border] = seeds[border]

        diffusion = DirichletOperator(adjacency, self.damping_factor)
        for i in range(self.n_iter):
            scores = diffusion.dot(scores)

        self.scores_ = scores

        return self

Beispiel #10

0

Datei anzeigen

Datei: random_projection.py Projekt: valeeraZ/scikit-network

    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'RandomProjection':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`RandomProjection`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        n = adjacency.shape[0]

        random_generator = check_random_state(self.random_state)
        random_matrix = random_generator.normal(size=(n, self.n_components))

        # make the matrix orthogonal
        random_matrix, _ = np.linalg.qr(random_matrix)

        factor = random_matrix
        embedding = factor.copy()

        if self.random_walk:
            transition = normalize(adjacency)
        else:
            transition = adjacency

        for t in range(self.n_iter):
            factor = self.alpha * transition.dot(factor)
            embedding += factor

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding

        return self

Beispiel #11

0

Datei anzeigen

Datei: louvain_hierarchy.py Projekt: valeeraZ/scikit-network

    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'LouvainHierarchy':
        """Fit algorithm to data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`LouvainHierarchy`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)

        tree = self._recursive_louvain(adjacency, self.depth)
        dendrogram, _ = get_dendrogram(tree)
        dendrogram = np.array(dendrogram)
        dendrogram[:, 2] -= min(dendrogram[:, 2])

        self.dendrogram_ = reorder_dendrogram(dendrogram)

        return self

Beispiel #12

0

Datei anzeigen

Datei: pagerank.py Projekt: brunoasouza/scikit-network

    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray, LinearOperator],
            seeds: Optional[Union[dict, np.ndarray]] = None) -> 'PageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix.
        seeds :
            If ``None``, the uniform distribution is used.
            Otherwise, a non-negative, non-zero vector or a dictionary must be provided.

        Returns
        -------
        self: :class:`PageRank`
        """
        if not isinstance(adjacency, LinearOperator):
            adjacency = check_format(adjacency)
        check_square(adjacency)
        seeds = seeds2probs(adjacency.shape[0], seeds)
        self.scores_ = get_pagerank(adjacency, seeds, damping_factor=self.damping_factor, n_iter=self.n_iter,
                                    solver=self.solver, tol=self.tol)

        return self

Beispiel #13

0

Datei anzeigen

Datei: louvain.py Projekt: zhongkailv/scikit-network

    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Louvain':
        """Fit algorithm to the data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Louvain`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n_nodes = adjacency.shape[0]

        probs_out = check_probs('degree', adjacency)
        probs_in = check_probs('degree', adjacency.T)

        nodes = np.arange(n_nodes)
        if self.shuffle_nodes:
            nodes = self.random_state.permutation(nodes)
            adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr()

        adjacency_norm = adjacency / adjacency.data.sum()

        membership = sparse.identity(n_nodes, format='csr')
        increase = True
        count_aggregations = 0
        self.log.print("Starting with", n_nodes, "nodes.")
        while increase:
            count_aggregations += 1

            current_labels, pass_increase = self._optimize(
                n_nodes, adjacency_norm, probs_out, probs_in)
            _, current_labels = np.unique(current_labels, return_inverse=True)

            if pass_increase <= self.tol_aggregation:
                increase = False
            else:
                membership_agg = membership_matrix(current_labels)
                membership = membership.dot(membership_agg)
                n_nodes, adjacency_norm, probs_out, probs_in = self._aggregate(
                    adjacency_norm, probs_out, probs_in, membership_agg)

                if n_nodes == 1:
                    break
            self.log.print("Aggregation", count_aggregations, "completed with",
                           n_nodes, "clusters and ", pass_increase,
                           "increment.")
            if count_aggregations == self.n_aggregations:
                break

        if self.sort_clusters:
            labels = reindex_labels(membership.indices)
        else:
            labels = membership.indices
        if self.shuffle_nodes:
            reverse = np.empty(nodes.size, nodes.dtype)
            reverse[nodes] = np.arange(nodes.size)
            labels = labels[reverse]

        self.labels_ = labels
        self._secondary_outputs(adjacency)

        return self

Beispiel #14

0

Datei anzeigen

Datei: spring.py Projekt: zhongkailv/scikit-network

    def fit(self,
            adjacency: Union[sparse.csr_matrix, np.ndarray],
            position_init: Optional[np.ndarray] = None,
            n_iter: Optional[int] = None) -> 'Spring':
        """Compute layout.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph, treated as undirected.
        position_init : np.ndarray
            Custom initial positions of the nodes. Shape must be (n, 2).
            If ``None``, use the value of self.pos_init.
        n_iter : int
            Number of iterations to update positions.
            If ``None``, use the value of self.n_iter.

        Returns
        -------
        self: :class:`Spring`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        if not is_symmetric(adjacency):
            adjacency = directed2undirected(adjacency)
        n = adjacency.shape[0]

        position = np.zeros((n, 2))
        if position_init is None:
            if self.position_init == 'random':
                position = np.random.randn(n, 2)
            elif self.position_init == 'spectral':
                position = Spectral(n_components=2,
                                    normalized=False).fit_transform(adjacency)
        elif isinstance(position_init, np.ndarray):
            if position_init.shape == (n, 2):
                position = position_init.copy()
            else:
                raise ValueError('Initial position has invalid shape.')
        else:
            raise TypeError('Initial position must be a numpy array.')

        if n_iter is None:
            n_iter = self.n_iter

        if self.strength is None:
            strength = np.sqrt((1 / n))
        else:
            strength = self.strength

        delta_x: float = position[:, 0].max() - position[:, 0].min()
        delta_y: float = position[:, 1].max() - position[:, 1].min()
        step_max: float = 0.1 * max(delta_x, delta_y)
        step: float = step_max / (n_iter + 1)

        delta = np.zeros((n, 2))
        for iteration in range(n_iter):
            delta *= 0
            for i in range(n):
                indices = adjacency.indices[adjacency.indptr[i]:adjacency.
                                            indptr[i + 1]]
                data = adjacency.data[adjacency.indptr[i]:adjacency.indptr[i +
                                                                           1]]

                grad: np.ndarray = (position[i] - position)  # shape (n, 2)
                distance: np.ndarray = np.linalg.norm(grad,
                                                      axis=1)  # shape (n,)
                distance = np.where(distance < 0.01, 0.01, distance)

                attraction = np.zeros(n)
                attraction[indices] += data * distance[indices] / strength

                repulsion = (strength / distance)**2

                delta[i]: np.ndarray = (
                    grad * (repulsion - attraction)[:, np.newaxis]).sum(
                        axis=0)  # shape (2,)
            length = np.linalg.norm(delta, axis=0)
            length = np.where(length < 0.01, 0.1, length)
            delta = delta * step_max / length
            position += delta
            step_max -= step
            err: float = np.linalg.norm(delta) / n
            if err < self.tol:
                break

        self.embedding_ = position
        return self

Beispiel #15

0

Datei anzeigen

    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Louvain':
        """Fit algorithm to the data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Louvain`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n = adjacency.shape[0]

        if self.modularity == 'potts':
            probs_ou = check_probs('uniform', adjacency)
            probs_in = probs_ou.copy()
        elif self.modularity == 'newman':
            probs_ou = check_probs('degree', adjacency)
            probs_in = probs_ou.copy()
        elif self.modularity == 'dugue':
            probs_ou = check_probs('degree', adjacency)
            probs_in = check_probs('degree', adjacency.T)
        else:
            raise ValueError('Unknown modularity function.')

        nodes = np.arange(n, dtype=np.int32)
        if self.shuffle_nodes:
            nodes = self.random_state.permutation(nodes)
            adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr()

        adjacency_clust = adjacency / adjacency.data.sum()

        membership = sparse.identity(n, format='csr')
        increase = True
        count_aggregations = 0
        self.log.print("Starting with", n, "nodes.")
        while increase:
            count_aggregations += 1

            labels_clust, pass_increase = self._optimize(
                adjacency_clust, probs_ou, probs_in)
            _, labels_clust = np.unique(labels_clust, return_inverse=True)

            if pass_increase <= self.tol_aggregation:
                increase = False
            else:
                membership_clust = membership_matrix(labels_clust)
                membership = membership.dot(membership_clust)
                adjacency_clust, probs_ou, probs_in = self._aggregate(
                    adjacency_clust, probs_ou, probs_in, membership_clust)

                n = adjacency_clust.shape[0]
                if n == 1:
                    break
            self.log.print("Aggregation", count_aggregations, "completed with",
                           n, "clusters and ", pass_increase, "increment.")
            if count_aggregations == self.n_aggregations:
                break

        if self.sort_clusters:
            labels = reindex_labels(membership.indices)
        else:
            labels = membership.indices
        if self.shuffle_nodes:
            reverse = np.empty(nodes.size, nodes.dtype)
            reverse[nodes] = np.arange(nodes.size)
            labels = labels[reverse]

        self.labels_ = labels
        self._secondary_outputs(adjacency)

        return self

Beispiel #16

0

Datei anzeigen

Datei: metrics.py Projekt: vintasoftware/scikit-network

def dasgupta_cost(adjacency: sparse.csr_matrix,
                  dendrogram: np.ndarray,
                  weights: str = 'uniform',
                  normalized: bool = False) -> float:
    """Dasgupta's cost of a hierarchy.

    * Graphs
    * Digraphs

    Expected size (weights = ``'uniform'``) or expected weight (weights = ``'degree'``) of the cluster induced by
    random edge sampling (closest ancestor of the two nodes in the hierarchy).

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    dendrogram :
        Dendrogram.
    weights :
        Weights of nodes.
        ``'degree'`` or ``'uniform'`` (default).
    normalized :
        If ``True``, normalized cost (between 0 and 1).

    Returns
    -------
    cost : float
        Cost.

    Example
    -------
    >>> from sknetwork.hierarchy import dasgupta_score, Paris
    >>> from sknetwork.data import house
    >>> paris = Paris()
    >>> adjacency = house()
    >>> dendrogram = paris.fit_transform(adjacency)
    >>> cost = dasgupta_cost(adjacency, dendrogram)
    >>> np.round(cost, 2)
    3.33

    References
    ----------
    Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
    Proceedings of ACM symposium on Theory of Computing.
    """
    adjacency = check_format(adjacency)
    check_square(adjacency)

    n = adjacency.shape[0]
    check_min_size(n, 2)

    aggregate_graph, height, edge_sampling, cluster_weight, _, _ = _instanciate_vars(
        adjacency, weights)

    for t in range(n - 1):
        i = int(dendrogram[t][0])
        j = int(dendrogram[t][1])
        if i >= n and height[i - n] == dendrogram[t][2]:
            edge_sampling[t] = edge_sampling[i - n]
            edge_sampling[i - n] = 0
        elif j >= n and height[j - n] == dendrogram[t][2]:
            edge_sampling[t] = edge_sampling[j - n]
            edge_sampling[j - n] = 0
        height[t] = dendrogram[t][2]
        if j in aggregate_graph.neighbors[i]:
            edge_sampling[t] += aggregate_graph.neighbors[i][j]
        cluster_weight[t] = aggregate_graph.cluster_out_weights[i] + aggregate_graph.cluster_out_weights[j] \
            + aggregate_graph.cluster_in_weights[i] + aggregate_graph.cluster_in_weights[j]
        aggregate_graph.merge(i, j)

    cost: float = edge_sampling.dot(cluster_weight) / 2

    if not normalized:
        if weights == 'degree':
            cost *= adjacency.data.sum()
        else:
            cost *= n

    return cost

Beispiel #17

0

Datei anzeigen

Datei: metrics.py Projekt: vintasoftware/scikit-network

def tree_sampling_divergence(adjacency: sparse.csr_matrix,
                             dendrogram: np.ndarray,
                             weights: str = 'degree',
                             normalized: bool = True) -> float:
    """Tree sampling divergence of a hierarchy (quality metric).

    * Graphs
    * Digraphs

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    dendrogram :
        Dendrogram.
    weights :
        Weights of nodes.
        ``'degree'`` (default) or ``'uniform'``.
    normalized :
        If ``True``, normalized score (between 0 and 1).

    Returns
    -------
    score : float
        Score.

    Example
    -------
    >>> from sknetwork.hierarchy import tree_sampling_divergence, Paris
    >>> from sknetwork.data import house
    >>> paris = Paris()
    >>> adjacency = house()
    >>> dendrogram = paris.fit_transform(adjacency)
    >>> score = tree_sampling_divergence(adjacency, dendrogram)
    >>> np.round(score, 2)
    0.52

    References
    ----------
    Charpentier, B. & Bonald, T. (2019).
    `Tree Sampling Divergence: An Information-Theoretic Metric for
    Hierarchical Graph Clustering.
    <https://hal.telecom-paristech.fr/hal-02144394/document>`_
    Proceedings of IJCAI.
    """
    adjacency = check_format(adjacency)
    check_square(adjacency)
    check_min_nnz(adjacency.nnz, 1)
    adjacency = adjacency.astype(float)
    n = adjacency.shape[0]
    check_min_size(n, 2)

    adjacency.data /= adjacency.data.sum()

    aggregate_graph, height, cluster_weight, edge_sampling, weights_row, weights_col = _instanciate_vars(
        adjacency, weights)
    node_sampling = np.zeros(n - 1)

    for t in range(n - 1):
        i = int(dendrogram[t][0])
        j = int(dendrogram[t][1])
        if i >= n and height[i - n] == dendrogram[t][2]:
            edge_sampling[t] = edge_sampling[i - n]
            edge_sampling[i - n] = 0
            node_sampling[t] = node_sampling[i - n]
        elif j >= n and height[j - n] == dendrogram[t][2]:
            edge_sampling[t] = edge_sampling[j - n]
            edge_sampling[j - n] = 0
            node_sampling[t] = node_sampling[j - n]
        if j in aggregate_graph.neighbors[i]:
            edge_sampling[t] += aggregate_graph.neighbors[i][j]
        node_sampling[t] += aggregate_graph.cluster_out_weights[i] * aggregate_graph.cluster_in_weights[j] + \
            aggregate_graph.cluster_out_weights[j] * aggregate_graph.cluster_in_weights[i]
        height[t] = dendrogram[t][2]
        aggregate_graph.merge(i, j)

    index = np.where(edge_sampling)[0]
    score = edge_sampling[index].dot(
        np.log(edge_sampling[index] / node_sampling[index]))
    if normalized:
        inv_out_weights = sparse.diags(weights_row, shape=(n, n), format='csr')
        inv_out_weights.data = 1 / inv_out_weights.data
        inv_in_weights = sparse.diags(weights_col, shape=(n, n), format='csr')
        inv_in_weights.data = 1 / inv_in_weights.data
        sampling_ratio = inv_out_weights.dot(adjacency.dot(inv_in_weights))
        inv_out_weights.data = np.ones(len(inv_out_weights.data))
        inv_in_weights.data = np.ones(len(inv_in_weights.data))
        edge_sampling = inv_out_weights.dot(adjacency.dot(inv_in_weights))
        mutual_information = edge_sampling.data.dot(np.log(
            sampling_ratio.data))
        score /= mutual_information
    return score

Beispiel #18

0

Datei anzeigen

Datei: randomized_methods.py Projekt: vintasoftware/scikit-network

def randomized_eig(matrix,
                   n_components: int,
                   which='LM',
                   n_oversamples: int = 10,
                   n_iter='auto',
                   power_iteration_normalizer: Union[str, None] = 'auto',
                   random_state=None,
                   one_pass: bool = False):
    """Truncated randomized eigenvalue decomposition.

    Parameters
    ----------
    matrix: ndarray or sparse matrix
        Matrix to decompose
    n_components: int
        Number of singular values and vectors to extract.
    which: str
        which eigenvalues to compute. ``'LM'`` for Largest Magnitude and ``'SM'`` for Smallest Magnitude.
        Any other entry will result in Largest Magnitude.
    n_oversamples : int (default=10)
        Additional number of random vectors to sample the range of ``matrix`` so as
        to ensure proper conditioning. The total number of random vectors
        used to find the range of ``matrix`` is ``n_components + n_oversamples``. Smaller number can improve speed
        but can negatively impact the quality of approximation of singular vectors and singular values.
    n_iter: int or 'auto' (default is 'auto')
        See :meth:`randomized_range_finder`
    power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
        See :meth:`randomized_range_finder`
    random_state: int, RandomState instance or None, optional (default=None)
        See :meth:`randomized_range_finder`
    one_pass: bool (default=False)
        whether to use algorithm 5.6 instead of 5.3. 5.6 requires less access to the original matrix,
        while 5.3 is more accurate.

    Returns
    -------
    eigenvalues: np.ndarray
    eigenvectors: np.ndarray

    References
    ----------
    Finding structure with randomness: Stochastic algorithms for constructing
    approximate matrix decompositions
    Halko, et al., 2009
    http://arxiv.org/abs/arXiv:0909.4061
    """
    check_square(adjacency=matrix)
    random_state = check_random_state(random_state)
    n_random = n_components + n_oversamples
    shift_value: float = 0.  # upper bound on spectral radius

    if which == 'SM':
        try:
            shift_value = (abs(matrix).dot(np.ones(matrix.shape[1]))).max()
        except TypeError:
            shift_value: float = 1.1 * randomized_eig(matrix,
                                                      n_components=1)[0][0]

        matrix *= -1
        if isinstance(matrix, SparseLR):
            matrix += shift_value * sparse.identity(matrix.shape[0],
                                                    format='csr')
        else:
            matrix += shift_value * sparse.identity(matrix.shape[0])

    if n_iter == 'auto':
        # Checks if the number of iterations is explicitly specified
        # Adjust n_iter. 7 was found a good compromise for PCA.
        n_iter = 7 if n_components < .1 * min(matrix.shape) else 4

    range_matrix, random_matrix, random_proj = randomized_range_finder(
        matrix, n_random, n_iter, power_iteration_normalizer, random_state,
        True)
    if one_pass:
        approx_matrix = np.linalg.lstsq(random_matrix.T.dot(range_matrix),
                                        random_proj.T.dot(range_matrix),
                                        None)[0].T
    else:
        approx_matrix = (matrix.dot(range_matrix)).T.dot(range_matrix)

    eigenvalues, eigenvectors = np.linalg.eig(approx_matrix)

    del approx_matrix
    # eigenvalues indices in decreasing order
    values_order = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[values_order]
    eigenvectors = np.dot(range_matrix, eigenvectors)[:, values_order]

    if which == 'SM':
        eigenvalues = shift_value - eigenvalues

    return eigenvalues[:n_components], eigenvectors[:, :n_components]

Beispiel #19

0

Datei anzeigen

Datei: metrics.py Projekt: zhongkailv/scikit-network

def modularity(adjacency: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray,
               weights: Union[str, np.ndarray] = 'degree', weights_in: Union[str, np.ndarray] = 'degree',
               resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]:
    """Modularity of a clustering (node partition).

    * Graphs
    * Digraphs

    The modularity of a clustering is

    :math:`Q = \\sum_{i,j}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w_iw_j}{w^2}\\right)\\delta_{c_i,c_j}`
    for graphs,

    :math:`Q = \\sum_{i,j}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right)\\delta_{c_i,c_j}`
    for digraphs,

    where

    * :math:`c_i` is the cluster of node :math:`i`,\n
    * :math:`w_i` is the weight of node :math:`i`,\n
    * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n
    * :math:`w = 1^TA1` is the total weight,\n
    * :math:`\\delta` is the Kronecker symbol,\n
    * :math:`\\gamma \\ge 0` is the resolution parameter.

    Parameters
    ----------
    adjacency:
        Adjacency matrix of the graph.
    labels:
        Labels of nodes, vector of size :math:`n` .
    weights :
        Weights of nodes.
        ``'degree'`` (default), ``'uniform'`` or custom weights.
    weights_in :
        In-weights of nodes.
        ``None`` (default), ``'degree'``, ``'uniform'`` or custom weights.
        If ``None``, taken equal to weights.
    resolution:
        Resolution parameter (default = 1).
    return_all:
        If ``True``, return modularity, fit, diversity.

    Returns
    -------
    modularity : float
    fit: float, optional
    diversity: float, optional

    Example
    -------
    >>> from sknetwork.clustering import modularity
    >>> from sknetwork.data import house
    >>> adjacency = house()
    >>> labels = np.array([0, 0, 1, 1, 0])
    >>> np.round(modularity(adjacency, labels), 2)
    0.11
    """
    adjacency = check_format(adjacency).astype(float)
    check_square(adjacency)

    if len(labels) != adjacency.shape[0]:
        raise ValueError('Dimension mismatch between labels and adjacency matrix.')

    probs_row = check_probs(weights, adjacency)
    probs_col = check_probs(weights_in, adjacency.T)
    membership = membership_matrix(labels)

    fit: float = membership.multiply(adjacency.dot(membership)).data.sum() / adjacency.data.sum()
    div: float = membership.T.dot(probs_col).dot(membership.T.dot(probs_row))
    mod: float = fit - resolution * div
    if return_all:
        return mod, fit, div
    else:
        return mod

Beispiel #20

0

Datei anzeigen

Datei: spring.py Projekt: vintasoftware/scikit-network

    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], position_init: Optional[np.ndarray] = None,
            n_iter: Optional[int] = None) -> 'Spring':
        """Compute layout.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph, treated as undirected.
        position_init : np.ndarray
            Custom initial positions of the nodes. Shape must be (n, 2).
            If ``None``, use the value of self.pos_init.
        n_iter : int
            Number of iterations to update positions.
            If ``None``, use the value of self.n_iter.

        Returns
        -------
        self: :class:`Spring`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        if not is_symmetric(adjacency):
            adjacency = directed2undirected(adjacency)
        n = adjacency.shape[0]

        position = np.zeros((n, self.n_components))
        if position_init is None:
            if self.position_init == 'random':
                position = np.random.randn(n, self.n_components)
            elif self.position_init == 'spectral':
                position = Spectral(n_components=self.n_components, normalized=False).fit_transform(adjacency)
        elif isinstance(position_init, np.ndarray):
            if position_init.shape == (n, self.n_components):
                position = position_init.copy()
            else:
                raise ValueError('Initial position has invalid shape.')
        else:
            raise TypeError('Initial position must be a numpy array.')

        if n_iter is None:
            n_iter = self.n_iter

        if self.strength is None:
            strength = np.sqrt((1 / n))
        else:
            strength = self.strength

        pos_max = position.max(axis=0)
        pos_min = position.min(axis=0)
        step_max: float = 0.1 * (pos_max - pos_min).max()
        step: float = step_max / (n_iter + 1)
        tree = None

        delta = np.zeros((n, self.n_components))
        for iteration in range(n_iter):
            delta *= 0
            if self.approx_radius > 0:
                tree = cKDTree(position)

            for i in range(n):
                # attraction
                indices = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i+1]]
                attraction = adjacency.data[adjacency.indptr[i]:adjacency.indptr[i+1]] / strength

                grad = position[i] - position[indices]
                attraction *= np.linalg.norm(grad, axis=1)
                attraction = (grad * attraction[:, np.newaxis]).sum(axis=0)

                # repulsion
                if tree is None:
                    grad: np.ndarray = (position[i] - position)  # shape (n, n_components)
                    distance: np.ndarray = np.linalg.norm(grad, axis=1)  # shape (n,)
                else:
                    neighbors = tree.query_ball_point(position[i], self.approx_radius)
                    grad: np.ndarray = (position[i] - position[neighbors])  # shape (n_neigh, n_components)
                    distance: np.ndarray = np.linalg.norm(grad, axis=1)  # shape (n_neigh,)

                distance = np.where(distance < 0.01, 0.01, distance)
                repulsion = (grad * (strength / distance)[:, np.newaxis] ** 2).sum(axis=0)

                # total force
                delta[i]: np.ndarray = repulsion - attraction

            length = np.linalg.norm(delta, axis=0)
            length = np.where(length < 0.01, 0.1, length)
            delta = delta * step_max / length
            position += delta
            step_max -= step
            err: float = np.linalg.norm(delta) / n
            if err < self.tol:
                break

        self.embedding_ = position
        return self

Beispiel #21

0

Datei anzeigen

Datei: metrics.py Projekt: sknetwork-team/scikit-network

def tree_sampling_divergence(adjacency: sparse.csr_matrix,
                             dendrogram: np.ndarray,
                             weights: str = 'degree',
                             normalized: bool = True) -> float:
    """Tree sampling divergence of a hierarchy (quality metric).

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    dendrogram :
        Dendrogram.
    weights :
        Weights of nodes.
        ``'degree'`` (default) or ``'uniform'``.
    normalized :
        If ``True``, normalized score (between 0 and 1).

    Returns
    -------
    score : float
        Score.

    Example
    -------
    >>> from sknetwork.hierarchy import tree_sampling_divergence, Paris
    >>> from sknetwork.data import house
    >>> paris = Paris()
    >>> adjacency = house()
    >>> dendrogram = paris.fit_transform(adjacency)
    >>> score = tree_sampling_divergence(adjacency, dendrogram)
    >>> np.round(score, 2)
    0.05

    References
    ----------
    Charpentier, B. & Bonald, T. (2019).
    `Tree Sampling Divergence: An Information-Theoretic Metric for
    Hierarchical Graph Clustering.
    <https://hal.telecom-paristech.fr/hal-02144394/document>`_
    Proceedings of IJCAI.
    """
    adjacency = check_format(adjacency)
    check_square(adjacency)
    check_min_nnz(adjacency.nnz, 1)
    adjacency = adjacency.astype(float)
    n = adjacency.shape[0]
    check_min_size(n, 2)

    adjacency.data /= adjacency.data.sum()
    edge_sampling, node_sampling, _ = get_sampling_distributions(
        adjacency, dendrogram, weights)

    index = np.where(edge_sampling)[0]
    score = edge_sampling[index].dot(
        np.log(edge_sampling[index] / node_sampling[index]))
    if normalized:
        weights_row = get_probs(weights, adjacency)
        weights_col = get_probs(weights, adjacency.T)
        inv_out_weights = sparse.diags(weights_row, shape=(n, n), format='csr')
        inv_out_weights.data = 1 / inv_out_weights.data
        inv_in_weights = sparse.diags(weights_col, shape=(n, n), format='csr')
        inv_in_weights.data = 1 / inv_in_weights.data
        sampling_ratio = inv_out_weights.dot(adjacency.dot(inv_in_weights))
        inv_out_weights.data = np.ones(len(inv_out_weights.data))
        inv_in_weights.data = np.ones(len(inv_in_weights.data))
        edge_sampling = inv_out_weights.dot(adjacency.dot(inv_in_weights))
        mutual_information = edge_sampling.data.dot(np.log(
            sampling_ratio.data))
        if mutual_information > 0:
            score /= mutual_information
    return score

Beispiel #22

0

Datei anzeigen

Datei: force_atlas.py Projekt: sknetwork-team/scikit-network

    def fit(self,
            adjacency: Union[sparse.csr_matrix, np.ndarray],
            pos_init: Optional[np.ndarray] = None,
            n_iter: Optional[int] = None) -> 'ForceAtlas':
        """Compute layout.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph, treated as undirected.
        pos_init :
            Position to start with. Random if not provided.
        n_iter : int
            Number of iterations to update positions.
            If ``None``, use the value of self.n_iter.

        Returns
        -------
        self: :class:`ForceAtlas`
        """
        # verify the format of the adjacency matrix
        adjacency = check_format(adjacency)
        check_square(adjacency)
        if not is_symmetric(adjacency):
            adjacency = directed2undirected(adjacency)
        n = adjacency.shape[0]

        # setting of the tolerance according to the size of the graph
        if n < 5000:
            tolerance = 0.1
        elif 5000 <= n < 50000:  # pragma: no cover
            tolerance = 1
        else:  # pragma: no cover
            tolerance = 10

        if n_iter is None:
            n_iter = self.n_iter

        # initial position of the nodes of the graph
        if pos_init is None:
            position: np.ndarray = np.random.randn(n, self.n_components)
        else:
            if pos_init.shape != (n, self.n_components):
                raise ValueError(
                    'The initial position does not have valid dimensions.')
            else:
                position = pos_init
        # compute the vector with the degree of each node
        degree: np.ndarray = adjacency.dot(np.ones(adjacency.shape[1])) + 1

        # initialization of variation of position of nodes
        resultants = np.zeros(n)
        delta: np.ndarray = np.zeros((n, self.n_components))
        swing_vector: np.ndarray = np.zeros(n)
        global_speed = 1

        for iteration in range(n_iter):
            delta *= 0
            global_swing = 0
            global_traction = 0

            if self.approx_radius > 0:
                tree = cKDTree(position)
            else:
                tree = None

            for i in range(n):

                # attraction
                indices = adjacency.indices[adjacency.indptr[i]:adjacency.
                                            indptr[i + 1]]
                attraction = position[i] - position[indices]

                if self.lin_log:
                    attraction = np.sign(attraction) * np.log(
                        1 + np.abs(10 * attraction))
                attraction = attraction.sum(axis=0)

                # repulsion
                if tree is None:
                    neighbors = np.arange(n)
                else:
                    neighbors = tree.query_ball_point(position[i],
                                                      self.approx_radius)

                grad: np.ndarray = (position[i] - position[neighbors]
                                    )  # shape (n_neigh, n_components)
                distance: np.ndarray = np.linalg.norm(
                    grad, axis=1)  # shape (n_neigh,)
                distance = np.where(distance < 0.01, 0.01, distance)
                repulsion = grad * (degree[neighbors] / distance)[:,
                                                                  np.newaxis]

                repulsion *= self.repulsive_factor * degree[i]
                repulsion = repulsion.sum(axis=0)

                # gravity
                gravity = self.gravity_factor * degree[i] * grad
                gravity = gravity.sum(axis=0)

                # forces resultant applied on node i for traction, swing and speed computation
                force = repulsion - attraction - gravity
                resultant_new: float = np.linalg.norm(force)
                resultant_old: float = resultants[i]

                swing_node: float = np.abs(
                    resultant_new -
                    resultant_old)  # force variation applied on node i
                swing_vector[i] = swing_node
                global_swing += (degree[i] + 1) * swing_node

                traction: float = np.abs(
                    resultant_new +
                    resultant_old) / 2  # traction force applied on node i
                global_traction += (degree[i] + 1) * traction

                node_speed = self.speed * global_speed / (
                    1 + global_speed * np.sqrt(swing_node))
                if node_speed > self.speed_max / resultant_new:  # pragma: no cover
                    node_speed = self.speed_max / resultant_new

                delta[i]: np.ndarray = node_speed * force
                resultants[i] = resultant_new
                global_speed = tolerance * global_traction / global_swing

            position += delta  # calculating displacement and final position of points after iteration
            if (swing_vector < 1).all():
                break  # if the swing of all nodes is zero, then convergence is reached and we break.

        self.embedding_ = position
        return self

Beispiel #23

0

Datei anzeigen

Datei: metrics.py Projekt: vintasoftware/scikit-network

def cosine_modularity(adjacency, embedding: np.ndarray, embedding_col=None, resolution=1., weights='degree',
                      return_all: bool = False):
    """Quality metric of an embedding :math:`x` defined by:

    :math:`Q = \\sum_{ij}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right)
    \\left(\\dfrac{1 + \\cos(x_i, x_j)}{2}\\right)`

    where

    * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n
    * :math:`w = 1^TA1` is the total weight of the graph.

    For bipartite graphs with column embedding :math:`y`, the metric is

    :math:`Q = \\sum_{ij}\\left(\\dfrac{B_{ij}}{w} - \\gamma \\dfrac{w_{1,i}w_{2,j}}{w^2}\\right)
    \\left(\\dfrac{1 + \\cos(x_i, y_j)}{2}\\right)`

    where

    * :math:`w_{1,i}, w_{2,j}` are the weights of nodes :math:`i` (row) and :math:`j` (column),\n
    * :math:`w = 1^TB1` is the total weight of the graph.

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    embedding :
        Embedding of the nodes.
    embedding_col :
        Embedding of the columns (for bipartite graphs).
    resolution :
        Resolution parameter.
    weights : ``'degree'`` or ``'uniform'``
        Weights of the nodes.
    return_all :
        If ``True``, also return fit and diversity

    Returns
    -------
    modularity : float
    fit: float, optional
    diversity: float, optional

    Example
    -------
    >>> from sknetwork.embedding import cosine_modularity
    >>> from sknetwork.data import karate_club
    >>> graph = karate_club(metadata=True)
    >>> adjacency = graph.adjacency
    >>> embedding = graph.position
    >>> np.round(cosine_modularity(adjacency, embedding), 2)
    0.35
    """
    adjacency = check_format(adjacency)
    total_weight: float = adjacency.data.sum()

    if embedding_col is None:
        check_square(adjacency)
        embedding_col = embedding.copy()

    embedding_row_norm = normalize(embedding, p=2)
    embedding_col_norm = normalize(embedding_col, p=2)

    probs_row = check_probs(weights, adjacency)
    probs_col = check_probs(weights, adjacency.T)

    if isinstance(embedding_row_norm, sparse.csr_matrix) and isinstance(embedding_col_norm, sparse.csr_matrix):
        fit: float = 0.5 * (1 + (embedding_row_norm.multiply(adjacency.dot(embedding_col_norm))).sum() / total_weight)
    else:
        fit: float = 0.5 * (
            1 + (np.multiply(embedding_row_norm, adjacency.dot(embedding_col_norm))).sum() / total_weight)
    div: float = 0.5 * (1 + (embedding.T.dot(probs_row)).dot(embedding_col.T.dot(probs_col)))

    if return_all:
        return fit, div, fit - resolution * div
    else:
        return fit - resolution * div

Beispiel #24

0

Datei anzeigen

    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Spectral':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`Spectral`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        solver = set_solver(self.solver, adjacency)
        n_components = 1 + check_n_components(self.n_components, n - 2)

        regularize: bool = not (self.regularization is None
                                or self.regularization == 0.)
        check_scaling(self.scaling, adjacency, regularize)

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n

        # Spectral decomposition of the normalized adjacency matrix
        weights_inv_sqrt_diag = diag_pinv(np.sqrt(weights))

        if regularization:
            norm_adjacency = NormalizedAdjacencyOperator(
                adjacency, regularization)
        else:
            norm_adjacency = weights_inv_sqrt_diag.dot(
                adjacency.dot(weights_inv_sqrt_diag))

        solver.which = 'LA'
        solver.fit(matrix=norm_adjacency, n_components=n_components)
        eigenvalues = solver.eigenvalues_
        index = np.argsort(-eigenvalues)[1:]  # skip first eigenvalue
        eigenvalues = eigenvalues[index]
        eigenvectors = weights_inv_sqrt_diag.dot(solver.eigenvectors_[:,
                                                                      index])

        embedding = eigenvectors.copy()

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv((1 - eigenvalues)**self.scaling)
            embedding = eigenvalues_inv_diag.dot(embedding.T).T

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self

Beispiel #25

0

Datei anzeigen

    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Spectral':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`Spectral`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: EigSolver = LanczosEig()
            else:  # pragma: no cover
                self.solver: EigSolver = HalkoEig()

        n_components = check_n_components(self.n_components, n - 2)
        n_components += 1

        if self.equalize and (self.regularization is None
                              or self.regularization
                              == 0.) and not is_connected(adjacency):
            raise ValueError(
                "The option 'equalize' is valid only if the graph is connected or with regularization."
                "Call 'fit' either with 'equalize' = False or positive 'regularization'."
            )

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n

        if self.normalized_laplacian:
            # Finding the largest eigenvalues of the normalized adjacency is easier for the solver than finding the
            # smallest eigenvalues of the normalized laplacian.
            weights_inv_sqrt_diag = diag_pinv(np.sqrt(weights))

            if regularization:
                norm_adjacency = NormalizedAdjacencyOperator(
                    adjacency, regularization)
            else:
                norm_adjacency = weights_inv_sqrt_diag.dot(
                    adjacency.dot(weights_inv_sqrt_diag))

            self.solver.which = 'LA'
            self.solver.fit(matrix=norm_adjacency, n_components=n_components)
            eigenvalues = 1 - self.solver.eigenvalues_
            # eigenvalues of the Laplacian in increasing order
            index = np.argsort(eigenvalues)[1:]
            # skip first eigenvalue
            eigenvalues = eigenvalues[index]
            # eigenvectors of the Laplacian, skip first eigenvector
            eigenvectors = np.array(
                weights_inv_sqrt_diag.dot(self.solver.eigenvectors_[:, index]))

        else:
            if regularization:
                laplacian = LaplacianOperator(adjacency, regularization)
            else:
                weight_diag = sparse.diags(weights, format='csr')
                laplacian = weight_diag - adjacency

            self.solver.which = 'SM'
            self.solver.fit(matrix=laplacian, n_components=n_components)
            eigenvalues = self.solver.eigenvalues_[1:]
            eigenvectors = self.solver.eigenvectors_[:, 1:]

        embedding = eigenvectors.copy()

        if self.equalize:
            eigenvalues_sqrt_inv_diag = diag_pinv(np.sqrt(eigenvalues))
            embedding = eigenvalues_sqrt_inv_diag.dot(embedding.T).T

        if self.barycenter:
            eigenvalues_diag = sparse.diags(eigenvalues)
            subtract = eigenvalues_diag.dot(embedding.T).T
            if not self.normalized_laplacian:
                weights_inv_diag = diag_pinv(weights)
                subtract = weights_inv_diag.dot(subtract)
            embedding -= subtract

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self