コード例 #1
0
ファイル: embeddings.py プロジェクト: alexayala08/PyNets
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="all_nodes",
               n_components=None, prune=0, norm=1):
    """

    Class for computing the adjacency spectral embedding of a graph.

    The adjacency spectral embedding (ASE) is a k-dimensional Euclidean
    representation of the graph based on its adjacency matrix. It relies on an
    SVD to reduce the dimensionality to the specified k, or if k is
    unspecified, can find a number of dimensions automatically

    Parameters
    ----------
    mat : ndarray or nx.Graph
        An nxn adjacency matrix or graph object.
    atlas : str
        The name of an atlas (indicating the node definition).
    graph_path : str
    ID : str
    subgraph_name : str

    Returns
    -------
    out_path : str
        File path to .npy file containing ASE embedding tensor.

    Notes
    -----
    The singular value decomposition:

    .. math:: A = U \Sigma V^T

    is used to find an orthonormal basis for a matrix, which in our case is the
    adjacency matrix of the graph. These basis vectors (in the matrices U or
    V) are ordered according to the amount of variance they explain in the
    original matrix. By selecting a subset of these basis vectors (through
    our choice of dimensionality reduction) we can find a lower dimensional
    space in which to represent the graph.

    References
    ----------
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
      Consistent Adjacency Spectral Embedding for Stochastic Blockmodel
      Graphs," Journal of the American Statistical Association,
      Vol. 107(499), 2012

    """
    import os
    import networkx as nx
    import numpy as np
    from pynets.core.utils import flatten
    from graspologic.embed.ase import AdjacencySpectralEmbed
    from joblib import dump
    from pynets.stats.netstats import CleanGraphs

    # Adjacency Spectral embedding
    print(
        f"{'Embedding unimodal asetome for atlas: '}{atlas} and "
        f"{subgraph_name}{'...'}"
    )
    ase = AdjacencySpectralEmbed(n_components=n_components)
    cg = CleanGraphs(None, None, graph_path, prune, norm)

    if float(norm) >= 1:
        G = cg.normalize_graph()
        mat_clean = nx.to_numpy_array(G)
    else:
        mat_clean = mat

    if float(prune) >= 1:
        graph_path_tmp = cg.prune_graph()[1]
        mat_clean = np.load(graph_path_tmp)

    mat_clean[np.where(np.isnan(mat_clean) | np.isinf(mat_clean))] = 0

    if (np.abs(mat_clean) < 0.0000001).all() or np.isnan(np.sum(mat_clean)):
        return None

    ase_fit = ase.fit_transform(mat_clean)

    dir_path = str(Path(os.path.dirname(graph_path)).parent)

    namer_dir = f"{dir_path}/embeddings"
    if os.path.isdir(namer_dir) is False:
        os.makedirs(namer_dir, exist_ok=True)

    out_path = f"{namer_dir}/gradient-ASE" \
               f"_{atlas}_{subgraph_name}_{os.path.basename(graph_path)}"
    # out_path_est = f"{namer_dir}/gradient-ASE_{atlas}" \
    #                f"_{subgraph_name}" \
    #                f"_{os.path.basename(graph_path).split('.npy')[0]}.joblib"

    #dump(ase, out_path_est)

    print("Saving...")
    np.save(out_path, ase_fit)
    del ase, ase_fit

    return out_path
コード例 #2
0
ファイル: embeddings.py プロジェクト: alexayala08/PyNets
def _omni_embed(pop_array, atlas, graph_path_list, ID,
                subgraph_name="all_nodes", n_components=None, norm=1):
    """
    Omnibus embedding of arbitrary number of input graphs with matched vertex
    sets.

    Given :math:`A_1, A_2, ..., A_m` a collection of (possibly weighted)
    adjacency matrices of a collection :math:`m` undirected graphs with
    matched vertices.
    Then the :math:`(mn \times mn)` omnibus matrix, :math:`M`, has the
    subgraph where :math:`M_{ij} = \frac{1}{2}(A_i + A_j)`. The omnibus
    matrix is then embedded using adjacency spectral embedding.


    Parameters
    ----------
    pop_array : list of nx.Graph or ndarray, or ndarray
        If list of nx.Graph, each Graph must contain same number of nodes.
        If list of ndarray, each array must have shape (n_vertices,
        n_vertices).
        If ndarray, then array must have shape (n_graphs, n_vertices,
        n_vertices).
    atlas : str
        The name of an atlas (indicating the node definition).
    graph_pathlist : list
        List of file paths to graphs in pop_array.
    ID : str
        An arbitrary subject identifier.
    subgraph_name : str

    Returns
    -------
    out_path : str
        File path to .npy file containing omni embedding tensor.

    References
    ----------
    .. [1] Levin, K., Athreya, A., Tang, M., Lyzinski, V., & Priebe, C. E.
      (2017, November). A central limit theorem for an omnibus embedding of
      multiple random dot product graphs. In Data Mining Workshops (ICDMW),
      2017 IEEE International Conference on (pp. 964-967). IEEE.
    .. [2] Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K.,
      Helm, H. S., & Vogelstein, J. T. (2019). Graspy: Graph statistics in
      python. Journal of Machine Learning Research.

    """
    import os
    import networkx as nx
    import numpy as np
    from pynets.core.utils import flatten
    from graspologic.embed.omni import OmnibusEmbed
    from graspologic.embed.mds import ClassicalMDS
    from joblib import dump
    from pynets.stats.netstats import CleanGraphs

    dir_path = str(Path(os.path.dirname(graph_path_list[0])).parent)

    namer_dir = f"{dir_path}/embeddings"
    if os.path.isdir(namer_dir) is False:
        os.makedirs(namer_dir, exist_ok=True)

    clean_mats = []
    i = 0
    for graph_path in graph_path_list:
        cg = CleanGraphs(None, None, graph_path, 0, norm)

        if float(norm) >= 1:
            G = cg.normalize_graph()
            mat_clean = nx.to_numpy_array(G)
        else:
            mat_clean = pop_array[i]

        mat_clean[np.where(np.isnan(mat_clean) | np.isinf(mat_clean))] = 0
        if np.isnan(np.sum(mat_clean)) == False:
            clean_mats.append(mat_clean)
        i += 1

    clean_mats = [i for i in clean_mats if np.isfinite(i).all()]

    if len(clean_mats) > 0:
        # Omnibus embedding
        print(
            f"{'Embedding unimodal omnetome for atlas: '}{atlas} and "
            f"{subgraph_name}{'...'}"
        )
        omni = OmnibusEmbed(n_components=n_components, check_lcc=False)
        mds = ClassicalMDS(n_components=n_components)
        omni_fit = omni.fit_transform(clean_mats)

        # Transform omnibus tensor into dissimilarity feature
        mds_fit = mds.fit_transform(omni_fit.reshape(omni_fit.shape[1],
                                                     omni_fit.shape[2],
                                                     omni_fit.shape[0]))

        out_path = (
            f"{namer_dir}/gradient-OMNI_{atlas}_{subgraph_name}_"
            f"{os.path.basename(graph_path_list[0]).split('_thrtype')[0]}.npy"
        )

        # out_path_est_omni = f"{namer_dir}/gradient-OMNI_{atlas}_" \
        #                     f"{subgraph_name}_" \
        #                     f"{os.path.basename(graph_path).split(
        #                     '_thrtype')[0]}" \
        #                     f"_MDS.joblib"
        # out_path_est_mds = f"{namer_dir}/gradient-OMNI_{atlas}_" \
        #                    f"{subgraph_name}_" \
        #                    f"{os.path.basename(graph_path).split(
        #                    '_thrtype')[0]}" \
        #                    f"_MDS.joblib"

        # dump(omni, out_path_est_omni)
        # dump(omni, out_path_est_mds)

        print("Saving...")
        np.save(out_path, mds_fit)
        del mds, mds_fit, omni, omni_fit
    else:
        # Add a null tmp file to prevent pool from breaking
        out_path = f"{namer_dir}/gradient-OMNI" \
                   f"_{atlas}_{subgraph_name}_" \
                   f"{os.path.basename(graph_path_list[0])}_NULL"
        if not os.path.exists(out_path):
            os.mknod(out_path)
    return out_path