コード例 #1
0
    def __init__(self,
                 resolution: float = 1,
                 modularity: str = 'dugue',
                 tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3,
                 n_aggregations: int = -1,
                 shuffle_nodes: bool = False,
                 sort_clusters: bool = True,
                 return_membership: bool = True,
                 return_aggregate: bool = True,
                 random_state: Optional[Union[np.random.RandomState,
                                              int]] = None,
                 verbose: bool = False):
        super(Louvain, self).__init__(sort_clusters=sort_clusters,
                                      return_membership=return_membership,
                                      return_aggregate=return_aggregate)
        VerboseMixin.__init__(self, verbose)

        self.resolution = np.float32(resolution)
        self.modularity = modularity
        self.tol = np.float32(tol_optimization)
        self.tol_aggregation = tol_aggregation
        self.n_aggregations = n_aggregations
        self.shuffle_nodes = shuffle_nodes
        self.random_state = check_random_state(random_state)
コード例 #2
0
 def __init__(self, which='LM', n_oversamples: int = 10, n_iter='auto',
              power_iteration_normalizer: Union[str, None] = 'auto', random_state=None, one_pass: bool = False):
     super(HalkoEig, self).__init__(which=which)
     self.n_oversamples = n_oversamples
     self.n_iter = n_iter
     self.power_iteration_normalizer = power_iteration_normalizer
     self.random_state = check_random_state(random_state)
     self.one_pass = one_pass
コード例 #3
0
    def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
                 random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
        super(LouvainNE, self).__init__()

        self.n_components = n_components
        self.scale = scale
        self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
                                          tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
                                          shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
        self.random_state = check_random_state(random_state)
        self.bipartite = None
コード例 #4
0
    def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
                 random_state: Optional[Union[np.random.RandomState, int]] = None, isolated_nodes: str = 'remove'):
        super(LouvainEmbedding, self).__init__()
        self.resolution = resolution
        self.modularity = modularity.lower()
        self.tol_optimization = tol_optimization
        self.tol_aggregation = tol_aggregation
        self.n_aggregations = n_aggregations
        self.shuffle_nodes = shuffle_nodes
        self.random_state = check_random_state(random_state)
        self.isolated_nodes = isolated_nodes

        self.labels_ = None
コード例 #5
0
def albert_barabasi(n: int = 100, degree: int = 3, undirected: bool = True, seed: Optional[int] = None) \
        -> sparse.csr_matrix:
    """Albert-Barabasi model.

    Parameters
    ----------
    n : int
        Number of nodes.
    degree : int
        Degree of incoming nodes (less than **n**).
    undirected : bool
        If ``True``, return an undirected graph.
    seed :
        Seed of the random generator (optional).

    Returns
    -------
    adjacency : sparse.csr_matrix
        Adjacency matrix.

    Example
    -------
    >>> from sknetwork.data import albert_barabasi
    >>> adjacency = albert_barabasi(30, 3)
    >>> adjacency.shape
    (30, 30)

    References
    ----------
    Albert, R., Barabási, L. (2002). `Statistical mechanics of complex networks
    <https://journals.aps.org/rmp/abstract/10.1103/RevModPhys.74.47>`_
    Reviews of Modern Physics.
    """
    random_state = check_random_state(seed)
    degrees = np.zeros(n, int)
    degrees[:degree] = degree - 1
    edges = [(i, j) for i in range(degree) for j in range(i)]
    for i in range(degree, n):
        neighbors = random_state.choice(a=i,
                                        p=degrees[:i] / degrees.sum(),
                                        size=degree,
                                        replace=False)
        degrees[neighbors] += 1
        degrees[i] = degree
        edges += [(i, j) for j in neighbors]
    return edgelist2adjacency(edges, undirected)
コード例 #6
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'RandomProjection':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`RandomProjection`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        n = adjacency.shape[0]

        random_generator = check_random_state(self.random_state)
        random_matrix = random_generator.normal(size=(n, self.n_components))

        # make the matrix orthogonal
        random_matrix, _ = np.linalg.qr(random_matrix)

        factor = random_matrix
        embedding = factor.copy()

        if self.random_walk:
            transition = normalize(adjacency)
        else:
            transition = adjacency

        for t in range(self.n_iter):
            factor = self.alpha * transition.dot(factor)
            embedding += factor

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding

        return self
コード例 #7
0
def randomized_eig(matrix,
                   n_components: int,
                   which='LM',
                   n_oversamples: int = 10,
                   n_iter='auto',
                   power_iteration_normalizer: Union[str, None] = 'auto',
                   random_state=None,
                   one_pass: bool = False):
    """Truncated randomized eigenvalue decomposition.

    Parameters
    ----------
    matrix: ndarray or sparse matrix
        Matrix to decompose
    n_components: int
        Number of singular values and vectors to extract.
    which: str
        which eigenvalues to compute. ``'LM'`` for Largest Magnitude and ``'SM'`` for Smallest Magnitude.
        Any other entry will result in Largest Magnitude.
    n_oversamples : int (default=10)
        Additional number of random vectors to sample the range of ``matrix`` so as
        to ensure proper conditioning. The total number of random vectors
        used to find the range of ``matrix`` is ``n_components + n_oversamples``. Smaller number can improve speed
        but can negatively impact the quality of approximation of singular vectors and singular values.
    n_iter: int or 'auto' (default is 'auto')
        See :meth:`randomized_range_finder`
    power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
        See :meth:`randomized_range_finder`
    random_state: int, RandomState instance or None, optional (default=None)
        See :meth:`randomized_range_finder`
    one_pass: bool (default=False)
        whether to use algorithm 5.6 instead of 5.3. 5.6 requires less access to the original matrix,
        while 5.3 is more accurate.

    Returns
    -------
    eigenvalues: np.ndarray
    eigenvectors: np.ndarray

    References
    ----------
    Finding structure with randomness: Stochastic algorithms for constructing
    approximate matrix decompositions
    Halko, et al., 2009
    http://arxiv.org/abs/arXiv:0909.4061
    """
    check_square(adjacency=matrix)
    random_state = check_random_state(random_state)
    n_random = n_components + n_oversamples
    shift_value: float = 0.  # upper bound on spectral radius

    if which == 'SM':
        try:
            shift_value = (abs(matrix).dot(np.ones(matrix.shape[1]))).max()
        except TypeError:
            shift_value: float = 1.1 * randomized_eig(matrix,
                                                      n_components=1)[0][0]

        matrix *= -1
        if isinstance(matrix, SparseLR):
            matrix += shift_value * sparse.identity(matrix.shape[0],
                                                    format='csr')
        else:
            matrix += shift_value * sparse.identity(matrix.shape[0])

    if n_iter == 'auto':
        # Checks if the number of iterations is explicitly specified
        # Adjust n_iter. 7 was found a good compromise for PCA.
        n_iter = 7 if n_components < .1 * min(matrix.shape) else 4

    range_matrix, random_matrix, random_proj = randomized_range_finder(
        matrix, n_random, n_iter, power_iteration_normalizer, random_state,
        True)
    if one_pass:
        approx_matrix = np.linalg.lstsq(random_matrix.T.dot(range_matrix),
                                        random_proj.T.dot(range_matrix),
                                        None)[0].T
    else:
        approx_matrix = (matrix.dot(range_matrix)).T.dot(range_matrix)

    eigenvalues, eigenvectors = np.linalg.eig(approx_matrix)

    del approx_matrix
    # eigenvalues indices in decreasing order
    values_order = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[values_order]
    eigenvectors = np.dot(range_matrix, eigenvectors)[:, values_order]

    if which == 'SM':
        eigenvalues = shift_value - eigenvalues

    return eigenvalues[:n_components], eigenvectors[:, :n_components]
コード例 #8
0
def randomized_range_finder(matrix: np.ndarray, size: int, n_iter: int, power_iteration_normalizer='auto',
                            random_state=None, return_all: bool = False) \
                            -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
    """Compute an orthonormal matrix :math:`Q`, whose range approximates the range of the input matrix.

    :math:`A \\approx QQ^*A`.

    Parameters
    ----------
    matrix :
        Input matrix
    size :
        Size of the return array
    n_iter :
        Number of power iterations. It can be used to deal with very noisy
        problems. When 'auto', it is set to 4, unless ``size`` is small
        (< .1 * min(matrix.shape)) in which case ``n_iter`` is set to 7.
        This improves precision with few components.
    power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
            Whether the power iterations are normalized with step-by-step
            QR factorization (the slowest but most accurate), ``None``
            (the fastest but numerically unstable when ``n_iter`` is large, e.g.
            typically 5 or larger), or ``'LU'`` factorization (numerically stable
            but can lose slightly in accuracy). The ``'auto'`` mode applies no
            normalization if ``n_iter`` <= 2 and switches to ``'LU'`` otherwise.
    random_state: int, RandomState instance or ``None``, optional (default= ``None``)
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If ``None``, the random number generator is the RandomState
        instance used by `np.random`.
    return_all : if True, returns (range_matrix, random_matrix, random_proj)
                else returns range_matrix.

    Returns
    -------
    range_matrix : np.ndarray
        matrix (size x size) projection matrix, the range of which
        approximates well the range of the input matrix.
    random_matrix : np.ndarray, optional
        projection matrix
    projected_matrix : np.ndarray, optional
        product between the data and the projection matrix

    Notes
    -----
    Follows Algorithm 4.3 of
    `Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions
    <http://arxiv.org/pdf/0909.4061>`_
    Halko, et al., 2009 (arXiv:909)
    """
    random_state = check_random_state(random_state)

    # Generating normal random vectors with shape: (A.shape[1], size)
    random_matrix = random_state.normal(size=(matrix.shape[1], size))
    if matrix.dtype.kind == 'f':
        # Ensure f32 is preserved as f32
        random_matrix = random_matrix.astype(matrix.dtype, copy=False)
    range_matrix = random_matrix.copy()

    # Deal with "auto" mode
    if power_iteration_normalizer == 'auto':
        if n_iter <= 2:
            power_iteration_normalizer = 'none'
        else:
            power_iteration_normalizer = 'LU'

    # Perform power iterations with 'range_matrix' to further 'imprint' the top
    # singular vectors of matrix in 'range_matrix'
    for i in range(n_iter):
        if power_iteration_normalizer == 'none':
            range_matrix = safe_sparse_dot(matrix, range_matrix)
            range_matrix = safe_sparse_dot(matrix.T, range_matrix)
        elif power_iteration_normalizer == 'LU':
            range_matrix, _ = linalg.lu(safe_sparse_dot(matrix, range_matrix),
                                        permute_l=True)
            range_matrix, _ = linalg.lu(safe_sparse_dot(
                matrix.T, range_matrix),
                                        permute_l=True)
        elif power_iteration_normalizer == 'QR':
            range_matrix, _ = linalg.qr(safe_sparse_dot(matrix, range_matrix),
                                        mode='economic')
            range_matrix, _ = linalg.qr(safe_sparse_dot(
                matrix.T, range_matrix),
                                        mode='economic')

    # Sample the range of 'matrix' using by linear projection of 'range_matrix'
    # Extract an orthonormal basis
    range_matrix, _ = linalg.qr(safe_sparse_dot(matrix, range_matrix),
                                mode='economic')
    if return_all:
        return range_matrix, random_matrix, matrix.dot(random_matrix)
    else:
        return range_matrix
コード例 #9
0
def randomized_svd(matrix,
                   n_components: int,
                   n_oversamples: int = 10,
                   n_iter='auto',
                   transpose='auto',
                   power_iteration_normalizer: Union[str, None] = 'auto',
                   flip_sign: bool = True,
                   random_state=None):
    """Truncated randomized SVD

    Parameters
    ----------
    matrix : ndarray or sparse matrix
        Matrix to decompose
    n_components : int
        Number of singular values and vectors to extract.
    n_oversamples : int (default=10)
        Additional number of random vectors to sample the range of M so as
        to ensure proper conditioning. The total number of random vectors
        used to find the range of M is n_components + n_oversamples. Smaller
        number can improve speed but can negatively impact the quality of
        approximation of singular vectors and singular values.
    n_iter : int or 'auto' (default is 'auto')
        See :meth:`randomized_range_finder`
    power_iteration_normalizer : ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
        See :meth:`randomized_range_finder`
    transpose : True, False or 'auto' (default)
        Whether the algorithm should be applied to ``matrix.T`` instead of ``matrix``. The
        result should approximately be the same. The 'auto' mode will
        trigger the transposition if ``matrix.shape[1] > matrix.shape[0]`` since this
        implementation of randomized SVD tends to be a little faster in that case.
    flip_sign : boolean, (default=True)
        The output of a singular value decomposition is only unique up to a
        permutation of the signs of the singular vectors. If `flip_sign` is
        set to `True`, the sign ambiguity is resolved by making the largest
        loadings for each component in the left singular vectors positive.
    random_state : int, RandomState instance or None, optional (default=None)
        See :meth:`randomized_range_finder`

    Returns
    -------
    left_singular_vectors: np.ndarray
    singular_values: np.ndarray
    right_singular_vectors: np.ndarray

    Notes
    -----
    This algorithm finds a (usually very good) approximate truncated
    singular value decomposition using randomization to speed up the
    computations. It is particularly fast on large matrices on which
    you wish to extract only a small number of components. In order to
    obtain further speed up, ``n_iter`` can be set <=2 (at the cost of
    loss of precision).

    References
    ----------
    * Finding structure with randomness: Stochastic algorithms for constructing
      approximate matrix decompositions
      Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
      (algorithm 5.1)
    * A randomized algorithm for the decomposition of matrices
      Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
    * An implementation of a randomized algorithm for principal component
      analysis
      A. Szlam et al. 2014
    """
    random_state = check_random_state(random_state)
    n_random = n_components + n_oversamples
    n_row, n_col = matrix.shape

    if n_iter == 'auto':
        # Checks if the number of iterations is explicitly specified
        # Adjust n_iter. 7 was found a good compromise for PCA. See #5299
        n_iter = 7 if n_components < .1 * min(matrix.shape) else 4

    if transpose == 'auto':
        transpose = n_row < n_col
    if transpose:
        # this implementation is a bit faster with smaller shape[1]
        matrix = matrix.T

    range_matrix: np.ndarray = randomized_range_finder(
        matrix, n_random, n_iter, power_iteration_normalizer, random_state)

    # project M to the (k + p) dimensional space using the basis vectors
    approx_matrix = safe_sparse_dot(range_matrix.T, matrix)

    # compute the SVD on the thin matrix: (k + p) wide
    uhat, singular_values, v = linalg.svd(approx_matrix, full_matrices=False)

    del approx_matrix
    u = np.dot(range_matrix, uhat)

    if flip_sign:
        if not transpose:
            u, v = svd_flip(u, v)
        else:
            # In case of transpose u_based_decision=false
            # to actually flip based on u and not v.
            u, v = svd_flip(u, v, u_based_decision=False)

    if transpose:
        # transpose back the results according to the input convention
        return v[:
                 n_components, :].T, singular_values[:
                                                     n_components], u[:, :
                                                                      n_components].T
    else:
        return u[:, :
                 n_components], singular_values[:
                                                n_components], v[:
                                                                 n_components, :]
コード例 #10
0
def watts_strogatz(n: int = 100,
                   degree: int = 6,
                   prob: float = 0.05,
                   seed: Optional[int] = None,
                   metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
    """Watts-Strogatz model.

    Parameters
    ----------
    n :
        Number of nodes.
    degree :
        Initial degree of nodes.
    prob :
        Probability of edge modification.
    seed :
        Seed of the random generator (optional).
    metadata :
        If ``True``, return a `Bunch` object with metadata.
    Returns
    -------
    adjacency or graph : Union[sparse.csr_matrix, Bunch]
        Adjacency matrix or graph with metadata (positions).

    Example
    -------
    >>> from sknetwork.data import watts_strogatz
    >>> adjacency = watts_strogatz(30, 4, 0.02)
    >>> adjacency.shape
    (30, 30)

    References
    ----------
    Watts, D., Strogatz, S. (1998). Collective dynamics of small-world networks, Nature.
    """
    random_state = check_random_state(seed)
    edges = np.array([(i, (i + j + 1) % n) for i in range(n)
                      for j in range(degree // 2)])
    row, col = edges[:, 0], edges[:, 1]
    adjacency = sparse.coo_matrix((np.ones_like(row, int), (row, col)),
                                  shape=(n, n))
    adjacency = sparse.lil_matrix(adjacency + adjacency.T)
    nodes = np.arange(n)
    for i in range(n):
        neighbors = adjacency.rows[i]
        candidates = list(set(nodes) - set(neighbors) - {i})
        for j in neighbors:
            if random_state.random() < prob:
                node = random_state.choice(candidates)
                adjacency[i, node] = 1
                adjacency[node, i] = 1
                adjacency[i, j] = 0
                adjacency[j, i] = 0
    adjacency = sparse.csr_matrix(adjacency, shape=adjacency.shape)
    if metadata:
        graph = Bunch()
        graph.adjacency = adjacency
        graph.position = cyclic_position(n)
        return graph
    else:
        return adjacency
コード例 #11
0
def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05,
                seed: Optional[int] = None, metadata: bool = False) \
                -> Union[sparse.csr_matrix, Bunch]:
    """Stochastic block model.

    Parameters
    ----------
    sizes :
         Block sizes.
    p_in :
        Probability of connection within blocks.
    p_out :
        Probability of connection across blocks.
    seed :
        Seed of the random generator (optional).
    metadata :
        If ``True``, return a `Bunch` object with metadata.

    Returns
    -------
    adjacency or graph : Union[sparse.csr_matrix, Bunch]
        Adjacency matrix or graph with metadata (labels).

    Example
    -------
    >>> from sknetwork.data import block_model
    >>> sizes = np.array([4, 5])
    >>> adjacency = block_model(sizes)
    >>> adjacency.shape
    (9, 9)

    References
    ----------
    Airoldi, E.,  Blei, D., Feinberg, S., Xing, E. (2007).
    `Mixed membership stochastic blockmodels. <https://arxiv.org/pdf/0705.4485.pdf>`_
    Journal of Machine Learning Research.
    """
    random_state = check_random_state(seed)
    sizes = np.array(sizes)

    if isinstance(p_in, (np.floating, float)):
        p_in = p_in * np.ones_like(sizes)
    else:
        p_in = np.array(p_in)

    # each edge is considered twice
    p_in = p_in / 2

    matrix = []
    for i, a in enumerate(sizes):
        row = []
        for j, b in enumerate(sizes):
            if j < i:
                row.append(None)
            elif j > i:
                row.append(
                    sparse.random(a,
                                  b,
                                  p_out,
                                  dtype=bool,
                                  random_state=random_state))
            else:
                row.append(
                    sparse.random(a,
                                  a,
                                  p_in[i],
                                  dtype=bool,
                                  random_state=random_state))
        matrix.append(row)
    adjacency = sparse.bmat(matrix)
    adjacency.setdiag(0)
    adjacency = directed2undirected(adjacency.tocsr(), weighted=False)

    if metadata:
        graph = Bunch()
        graph.adjacency = adjacency
        labels = np.repeat(np.arange(len(sizes)), sizes)
        graph.labels = labels
        return graph
    else:
        return adjacency