예제 #1
0
def get_embedding(input_matrix: Union[sparse.csr_matrix, np.ndarray],
                  method: BaseEmbedding,
                  co_embedding: bool = False) -> Tuple[np.ndarray, bool]:
    """Return the embedding of the input_matrix.
    Parameters
    ----------
    input_matrix :
        Adjacency matrix of biadjacency matrix of the graph.
    method :
        Embedding method.
    co_embedding : bool
        If ``True``, co-embedding of rows and columns.
        Otherwise, do it only if the input matrix is not square or not symmetric with ``allow_directed=False``.
    """

    bipartite = (not is_square(input_matrix)) or co_embedding
    if co_embedding:
        try:
            method.fit(input_matrix, force_bipartite=True)
        except:
            method.fit(input_matrix)
        embedding = np.vstack((method.embedding_row_, method.embedding_col_))
    else:
        method.fit(input_matrix)
        embedding = method.embedding_
    return embedding, bipartite
예제 #2
0
def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
                  force_bipartite: bool = False, force_directed: bool = False)\
        -> Tuple[sparse.csr_matrix, bool]:
    """Check the input matrix and return a proper adjacency matrix.
    Parameters
    ----------
    input_matrix :
        Adjacency matrix of biadjacency matrix of the graph.
    allow_directed :
        If ``True`` (default), allow the graph to be directed.
    force_bipartite : bool
        If ``True``, return the adjacency matrix of a bipartite graph.
        Otherwise (default), do it only if the input matrix is not square or not symmetric
        with ``allow_directed=False``.
    force_directed :
        If ``True`` return :math:`A  = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
        Otherwise (default), return :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
    """
    input_matrix = check_format(input_matrix)
    bipartite = False
    if force_bipartite or not is_square(input_matrix) or not (
            allow_directed or is_symmetric(input_matrix)):
        bipartite = True
    if bipartite:
        if force_directed:
            adjacency = bipartite2directed(input_matrix)
        else:
            adjacency = bipartite2undirected(input_matrix)
    else:
        adjacency = input_matrix
    return adjacency, bipartite
예제 #3
0
def save(folder: str, data: Union[sparse.csr_matrix, Bunch]):
    """Save a Bunch or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
    subsequent loads.

    Parameters
    ----------
    folder : str
        The name to be used for the bundle folder
    data : Union[sparse.csr_matrix, Bunch]
        The data to save

    Example
    -------
    >>> from sknetwork.data import save
    >>> graph = Bunch()
    >>> graph.adjacency = sparse.csr_matrix(np.random.random((10, 10)) < 0.2)
    >>> graph.names = np.array(list('abcdefghij'))
    >>> save('random_data', graph)
    >>> 'random_data' in listdir('.')
    True
    """
    folder = expanduser(folder)
    if exists(folder):
        shutil.rmtree(folder)
    if isinstance(data, sparse.csr_matrix):
        bunch = Bunch()
        if is_square(data):
            bunch.adjacency = data
        else:
            bunch.biadjacency = data
        data = bunch
    if isabs(folder):
        save_to_numpy_bundle(data, folder, '')
    else:
        save_to_numpy_bundle(data, folder, './')
예제 #4
0
def largest_connected_component(adjacency: Union[sparse.csr_matrix,
                                                 np.ndarray],
                                return_labels: bool = False):
    """Extract the largest connected component of a graph. Bipartite graphs are treated as undirected.

    * Graphs
    * Digraphs
    * Bigraphs

    Parameters
    ----------
    adjacency :
        Adjacency or biadjacency matrix of the graph.
    return_labels : bool
        Whether to return the indices of the new nodes in the original graph.

    Returns
    -------
    new_adjacency : sparse.csr_matrix
        Adjacency or biadjacency matrix of the largest connected component.
    indices : array or tuple of array
        Indices of the nodes in the original graph. For biadjacency matrices,
        ``indices[0]`` corresponds to the rows and ``indices[1]`` to the columns.

    """
    adjacency = check_format(adjacency)
    n_row, n_col = adjacency.shape
    if not is_square(adjacency):
        bipartite: bool = True
        full_adjacency = sparse.bmat([[None, adjacency], [adjacency.T, None]],
                                     format='csr')
    else:
        bipartite: bool = False
        full_adjacency = adjacency

    labels = connected_components(full_adjacency)
    unique_labels, counts = np.unique(labels, return_counts=True)
    component_label = unique_labels[np.argmax(counts)]
    component_indices = np.where(labels == component_label)[0]

    if bipartite:
        split_ix = np.searchsorted(component_indices, n_row)
        row_ix, col_ix = component_indices[:split_ix], component_indices[
            split_ix:] - n_row
    else:
        row_ix, col_ix = component_indices, component_indices
    new_adjacency = adjacency[row_ix, :]
    new_adjacency = (new_adjacency.tocsc()[:, col_ix]).tocsr()

    if return_labels:
        if bipartite:
            return new_adjacency, (row_ix, col_ix)
        else:
            return new_adjacency, row_ix
    else:
        return new_adjacency
    def fit(self,
            input_matrix: sparse.csr_matrix,
            force_bipartite: bool = False):
        """Embedding of graphs from the clustering obtained with Louvain.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite : bool (default = ``False``)
            If ``True``, force the input matrix to be considered as a biadjacency matrix.
        Returns
        -------
        self: :class:`BiLouvainEmbedding`
        """
        louvain = Louvain(resolution=self.resolution,
                          modularity=self.modularity,
                          tol_optimization=self.tol_optimization,
                          tol_aggregation=self.tol_aggregation,
                          n_aggregations=self.n_aggregations,
                          shuffle_nodes=self.shuffle_nodes,
                          sort_clusters=False,
                          return_membership=True,
                          return_aggregate=True,
                          random_state=self.random_state)
        louvain.fit(input_matrix, force_bipartite=force_bipartite)

        # isolated nodes
        if is_square(input_matrix):
            labels = louvain.labels_
            labels_secondary = None
        else:
            labels = louvain.labels_col_
            labels_secondary = louvain.labels_row_

        self.labels_, labels_row = reindex_labels(labels, labels_secondary,
                                                  self.isolated_nodes)

        # embedding
        probs = normalize(input_matrix)
        embedding_ = probs.dot(membership_matrix(self.labels_))
        self.embedding_ = embedding_.toarray()

        if labels_row is not None:
            probs = normalize(input_matrix.T)
            embedding_col = probs.dot(membership_matrix(labels_row))
            self.embedding_row_ = self.embedding_
            self.embedding_col_ = embedding_col.toarray()

        return self
예제 #6
0
def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
    """Save a Bunch or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
    subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and Bunch.

    Parameters
    ----------
    folder : str or :class:`pathlib.Path`
        The name to be used for the bundle folder
    data : Union[sparse.csr_matrix, Bunch]
        The data to save

    Example
    -------
    >>> from sknetwork.data import save
    >>> graph = Bunch()
    >>> graph.adjacency = sparse.csr_matrix(np.random.random((10, 10)) < 0.2)
    >>> graph.names = np.array(list('abcdefghij'))
    >>> save('random_data', graph)
    >>> 'random_data' in listdir('.')
    True
    """
    folder = Path(folder)
    folder = folder.expanduser()
    if folder.exists():
        shutil.rmtree(folder)
    if isinstance(data, sparse.csr_matrix):
        bunch = Bunch()
        if is_square(data):
            bunch.adjacency = data
        else:
            bunch.biadjacency = data
        data = bunch
    if folder.is_absolute():
        save_to_numpy_bundle(data, folder, '/')
    else:
        save_to_numpy_bundle(data, folder, '.')