コード例 #1
0
ファイル: conftest.py プロジェクト: dPys/PyNets
    def _gen_mat_data(n: int = 20,
                      m: int = 20,
                      p: int = 0.50,
                      mat_type: str = 'sb',
                      binary: bool = False,
                      asfile: bool = True,
                      n_graphs: int = 1):
        if binary is True:
            wt = 1
        else:
            wt = np.random.uniform

        mat_list = []
        mat_file_list = []
        for nm in range(n_graphs):
            if mat_type == 'er':
                mat = largest_connected_component(
                    symmetrize(
                        remove_loops(
                            er_nm(n,
                                  m,
                                  wt=np.random.uniform,
                                  wtargs=dict(low=0, high=1)))))
            elif mat_type == 'sb':
                if p is None:
                    raise ValueError(
                        f"for mat_type {mat_type}, p cannot be None")
                mat = largest_connected_component(
                    symmetrize(
                        remove_loops(
                            sbm(np.array([n]),
                                np.array([[p]]),
                                wt=wt,
                                wtargs=dict(low=0, high=1)))))
            else:
                raise ValueError(f"mat_type {mat_type} not recognized!")

            mat_list.append(mat)

            if asfile is True:
                mat_path_tmp = tempfile.NamedTemporaryFile(mode='w+',
                                                           suffix='.npy',
                                                           delete=False)
                mat_path = str(mat_path_tmp.name)
                np.save(mat_path, mat)
                mat_file_list.append(mat_path)
                mat_path_tmp.close()

        return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
コード例 #2
0
ファイル: interfaces.py プロジェクト: dPys/PyNets
    def __init__(self,
                 est_path,
                 prune,
                 norm,
                 out_fmt="gpickle",
                 remove_self_loops=True):
        import graspologic.utils as gu

        self.est_path = est_path
        self.prune = prune
        self.norm = norm
        self.out_fmt = out_fmt
        self.in_mat = None

        # Load and threshold matrix
        self.in_mat_raw = utils.load_mat(self.est_path)

        # De-diagnal and remove nan's and inf's, ensure edge weights are
        # positive
        self.in_mat = np.array(
            np.array(thresholding.autofix(np.array(np.abs(self.in_mat_raw)))))

        # Remove self-loops and ensure symmetry
        if remove_self_loops is True:
            self.in_mat = gu.remove_loops(gu.symmetrize(self.in_mat))
        else:
            self.in_mat = gu.symmetrize(self.in_mat)

        self.in_mat[np.where(np.isnan(self.in_mat)
                             | np.isinf(self.in_mat))] = 0

        # Create nx graph
        self.G = nx.from_numpy_array(self.in_mat)
コード例 #3
0
def signal_flow(A):
    """Implementation of the signal flow metric from Varshney et al 2011

    Parameters
    ----------
    A : [type]
        [description]

    Returns
    -------
    [type]
        [description]
    """
    A = A.copy()
    A = remove_loops(A)
    W = (A + A.T) / 2

    D = np.diag(np.sum(W, axis=1))

    L = D - W

    b = np.sum(W * np.sign(A - A.T), axis=1)
    L_pinv = np.linalg.pinv(L)
    z = L_pinv @ b

    return z
コード例 #4
0
def calculate_p_upper(A):
    A = remove_loops(A)
    n = len(A)
    triu_inds = np.triu_indices(n, k=1)
    upper_triu_sum = A[triu_inds].sum()
    total_sum = A.sum()
    upper_triu_p = upper_triu_sum / total_sum
    return upper_triu_p
コード例 #5
0
def _augment_graph(
    graph: Union[nx.Graph, nx.OrderedGraph, nx.DiGraph, nx.OrderedDiGraph],
    node_ids: Set[Hashable],
    weight_attribute: Optional[str],
    perform_augment_diagonal: bool = True,
) -> np.ndarray:
    graph_sparse = nx.to_scipy_sparse_matrix(graph,
                                             weight=weight_attribute,
                                             nodelist=node_ids)

    graphs_loops_removed: np.ndarray = remove_loops(graph_sparse)
    graphs_ranked: np.ndarray = pass_to_ranks(graphs_loops_removed)

    if perform_augment_diagonal:
        graphs_diag_augmented: np.ndarray = augment_diagonal(graphs_ranked)
        return graphs_diag_augmented

    return graphs_ranked
コード例 #6
0
ファイル: conftest.py プロジェクト: dPys/PyNets
    def _gen_mat_data(n: int=20, m: int=20, p: int=0.50,
                      mat_type: str='sb', binary: bool=False,
                      asfile: bool=True, n_graphs: int=1,
                      lcc: bool=False, modality: str='func'):
        if binary is True:
            wt = 1
        else:
            wt = np.random.uniform

        mat_list = []
        mat_file_list = []

        if n_graphs > 0:
            for nm in range(n_graphs):
                if mat_type == 'er':
                    mat = symmetrize(
                        remove_loops(er_nm(n, m, wt=np.random.uniform,
                                           wtargs=dict(low=0, high=1))))
                elif mat_type == 'sb':
                    if p is None:
                        raise ValueError(
                            f"for mat_type {mat_type}, p cannot be None")
                    mat = symmetrize(
                        remove_loops(sbm(np.array([n]), np.array([[p]]),
                                         wt=wt, wtargs=dict(low=0,
                                                            high=1))))
                else:
                    raise ValueError(f"mat_type {mat_type} not recognized!")

                if lcc is True:
                    mat = largest_connected_component(mat)

                mat_list.append(autofix(mat))

                if asfile is True:
                    path_tmp = tempfile.NamedTemporaryFile(mode='w+',
                                                           suffix='.npy',
                                                           delete=False)
                    mat_path_tmp = str(path_tmp.name)
                    out_folder = f"{str(Path.home())}/test_mats"
                    os.makedirs(out_folder, exist_ok=True)

                    if modality == 'func':
                        mat_path = f"{out_folder}/graph_sub-999_modality-func_" \
                        f"model-corr_template-" \
                        f"MNI152_2mm_" \
                        f"parc_tol-6fwhm_hpass-" \
                        f"0Hz_" \
                        f"signal-mean_thrtype-prop_thr-" \
                        f"{round(random.uniform(0, 1),2)}.npy"
                    elif modality == 'dwi':
                        mat_path = f"{out_folder}/graph_sub-999_modality-func_" \
                        f"model-csa_template-" \
                        f"MNI152_2mm_tracktype-local_" \
                        f"traversal-det_minlength-30_" \
                        f"tol-5_thrtype-prop_thr-" \
                        f"{round(random.uniform(0, 1),2)}.npy"

                    shutil.copyfile(mat_path_tmp, mat_path)
                    np.save(mat_path, mat)
                    mat_file_list.append(mat_path)
                    path_tmp.close()

        return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
コード例 #7
0
def motif_matching(
    paths,
    ID,
    atlas,
    namer_dir,
    name_list,
    metadata_list,
    multigraph_list_all,
    graph_path_list_all,
    rsn=None,
):
    import networkx as nx
    import numpy as np
    import glob
    import pickle
    from pynets.core import thresholding
    from pynets.stats.netmotifs import compare_motifs
    from sklearn.metrics.pairwise import cosine_similarity
    from pynets.stats.netstats import community_resolution_selection
    from graspologic.utils import remove_loops, symmetrize
    from pynets.core.nodemaker import get_brainnetome_node_attributes

    [struct_graph_path, func_graph_path] = paths
    struct_mat = np.load(struct_graph_path)
    func_mat = np.load(func_graph_path)

    [struct_coords, struct_labels, struct_label_intensities] = \
        get_brainnetome_node_attributes(glob.glob(
            f"{str(Path(struct_graph_path).parent.parent)}/nodes/*.json"),
        struct_mat.shape[0])

    [func_coords, func_labels, func_label_intensities] = \
        get_brainnetome_node_attributes(glob.glob(
            f"{str(Path(func_graph_path).parent.parent)}/nodes/*.json"),
        func_mat.shape[0])

    # Find intersecting nodes across modalities (i.e. assuming the same
    # parcellation, but accomodating for the possibility of dropped nodes)
    diff1 = list(set(struct_label_intensities) - set(func_label_intensities))
    diff2 = list(set(func_label_intensities) - set(struct_label_intensities))
    G_struct = nx.from_numpy_array(struct_mat)
    G_func = nx.from_numpy_array(func_mat)

    bad_idxs = []
    for val in diff1:
        bad_idxs.append(struct_label_intensities.index(val))
        bad_idxs = sorted(list(set(bad_idxs)), reverse=True)
        if type(struct_coords) is np.ndarray:
            struct_coords = list(tuple(x) for x in struct_coords)
    for j in bad_idxs:
        G_struct.remove_node(j)
        print(f"Removing: {(struct_labels[j], struct_coords[j])}...")
        del struct_labels[j], struct_coords[j]

    bad_idxs = []
    for val in diff2:
        bad_idxs.append(func_label_intensities.index(val))
        bad_idxs = sorted(list(set(bad_idxs)), reverse=True)
        if type(func_coords) is np.ndarray:
            func_coords = list(tuple(x) for x in func_coords)
    for j in bad_idxs:
        G_func.remove_node(j)
        print(f"Removing: {(func_labels[j], func_coords[j])}...")
        del func_labels[j], func_coords[j]

    struct_mat = nx.to_numpy_array(G_struct)
    func_mat = nx.to_numpy_array(G_func)

    struct_mat = thresholding.autofix(symmetrize(remove_loops(struct_mat)))

    func_mat = thresholding.autofix(symmetrize(remove_loops(func_mat)))

    if func_mat.shape == struct_mat.shape:
        func_mat[~struct_mat.astype("bool")] = 0
        struct_mat[~func_mat.astype("bool")] = 0
        print(
            "Edge disagreements after matching: ",
            sum(sum(abs(func_mat - struct_mat))),
        )

        metadata = {}
        assert (len(struct_coords) == len(struct_labels) == len(func_coords) ==
                len(func_labels) == func_mat.shape[0])
        metadata["coords"] = struct_coords
        metadata["labels"] = struct_labels
        metadata_list.append(metadata)

        struct_mat = np.maximum(struct_mat, struct_mat.T)
        func_mat = np.maximum(func_mat, func_mat.T)
        struct_mat = thresholding.standardize(struct_mat)
        func_mat = thresholding.standardize(func_mat)

        struct_node_comm_aff_mat = community_resolution_selection(
            nx.from_numpy_matrix(np.abs(struct_mat)))[1]

        func_node_comm_aff_mat = community_resolution_selection(
            nx.from_numpy_matrix(np.abs(func_mat)))[1]

        struct_comms = []
        for i in np.unique(struct_node_comm_aff_mat):
            struct_comms.append(struct_node_comm_aff_mat == i)

        func_comms = []
        for i in np.unique(func_node_comm_aff_mat):
            func_comms.append(func_node_comm_aff_mat == i)

        sims = cosine_similarity(struct_comms, func_comms)
        try:
            struct_comm = struct_comms[np.argmax(sims, axis=0)[0]]
        except BaseException:
            print('Matching by structural communities failed...')
            struct_comm = struct_mat
        try:
            func_comm = func_comms[np.argmax(sims, axis=0)[0]]
        except BaseException:
            print('Matching by functional communities failed...')
            func_comm = func_mat

        comm_mask = np.equal.outer(struct_comm, func_comm).astype(bool)

        try:
            assert comm_mask.shape == struct_mat.shape == func_mat.shape
        except AssertionError as e:
            e.args += (comm_mask, comm_mask.shape, struct_mat,
                       struct_mat.shape, func_mat, func_mat.shape)

        try:
            struct_mat[~comm_mask] = 0
        except BaseException:
            print('Skipping community masking...')
        try:
            func_mat[~comm_mask] = 0
        except BaseException:
            print('Skipping community masking...')

        struct_name = struct_graph_path.split("/rawgraph_")[-1].split(
            ".npy")[0]
        func_name = func_graph_path.split("/rawgraph_")[-1].split(".npy")[0]
        name = f"sub-{ID}_{atlas}_mplx_Layer-1_{struct_name}_" \
               f"Layer-2_{func_name}"
        name_list.append(name)
        struct_mat = np.maximum(struct_mat, struct_mat.T)
        func_mat = np.maximum(func_mat, func_mat.T)
        try:
            [mldict, g_dict] = compare_motifs(struct_mat, func_mat, name,
                                              namer_dir)
        except BaseException:
            print(f"Adaptive thresholding by motif comparisons failed "
                  f"for {name}. This usually happens when no motifs are found")
            return [], [], [], []

        multigraph_list_all.append(list(mldict.values())[0])
        graph_path_list = []
        for thr in list(g_dict.keys()):
            multigraph_path_list_dict = {}
            [struct, func] = g_dict[thr]
            struct_out = f"{namer_dir}/struct_{atlas}_{struct_name}.npy"
            func_out = f"{namer_dir}/struct_{atlas}_{func_name}_" \
                       f"motif-{thr}.npy"
            np.save(struct_out, struct)
            np.save(func_out, func)
            multigraph_path_list_dict[f"struct_{atlas}_{thr}"] = struct_out
            multigraph_path_list_dict[f"func_{atlas}_{thr}"] = func_out
            graph_path_list.append(multigraph_path_list_dict)
        graph_path_list_all.append(graph_path_list)
    else:
        print(
            f"Skipping {rsn} rsn, since structural and functional graphs are "
            f"not identical shapes.")

    return name_list, metadata_list, multigraph_list_all, graph_path_list_all
コード例 #8
0
def adjacency_spectral_embedding(
    graph: Union[nx.Graph, nx.DiGraph, nx.OrderedGraph, nx.OrderedDiGraph],
    dimensions: int = 100,
    elbow_cut: Optional[int] = None,
    svd_solver_algorithm: SvdAlgorithmType = "randomized",
    svd_solver_iterations: int = 5,
    svd_seed: Optional[int] = None,
    weight_attribute: str = "weight",
) -> Embeddings:
    """
    Given a directed or undirected networkx graph (*not* multigraph), generate an
    Embeddings object.

    Adjacency spectral embeddings are extremely egocentric, implying that results are
    slanted toward the core-periphery of each node. This is in contrast to Laplacian
    spectral embeddings, which look further into the latent space when it captures
    change.

    `Adjacency Spectral Embedding Tutorial
    <https://microsoft.github.io/graspologic/tutorials/embedding/AdjacencySpectralEmbed.html>`_

    Graphs will always have their diagonal augmented. In other words, a self-loop
    will be created for each node with a weight corresponding to the weighted degree.

    Lastly, all weights will be rescaled based on their relative rank in the graph,
    which is beneficial in minimizing anomalous results if some edge weights are
    extremely atypical of the rest of the graph.

    Parameters
    ----------
    graph : Union[nx.Graph, nx.OrderedGraph, nx.DiGraph, nx.OrderedDiGraph]
        An undirected or directed graph. The graph **must**:

        - be fully numerically weighted (every edge must have a real, numeric weight
          or else it will be treated as an unweighted graph)
        - be a basic graph (meaning it should not be a multigraph; if you have a
          multigraph you must first decide how you want to handle the weights of the
          edges between two nodes, whether summed, averaged, last-wins,
          maximum-weight-only, etc)
    dimensions : int (default=100)
        Dimensions to use for the svd solver.
        For undirected graphs, if ``elbow_cut==None``, you will receive an embedding
        that has ``nodes`` rows and ``dimensions`` columns.
        For directed graphs, if ``elbow_cut==None``, you will receive an embedding that
        has ``nodes`` rows and ``2*dimensions`` columns.
        If ``elbow_cut`` is specified to be not ``None``, we will cut the embedding at
        ``elbow_cut`` elbow, but the provided ``dimensions`` will be used in the
        creation of the SVD.
    elbow_cut : Optional[int] (default=None)
        Using a process described by Zhu & Ghodsi in their paper "Automatic
        dimensionality selection from the scree plot via the use of profile likelihood",
        truncate the dimensionality of the return on the ``elbow_cut``-th elbow.
        By default this value is ``None`` but can be used to reduce the dimensionality
        of the returned tensors.
    svd_solver_algorithm : str (default="randomized")
        allowed values: {'randomized', 'full', 'truncated'}

        SVD solver to use:

            - 'randomized'
                Computes randomized svd using
                :func:`sklearn.utils.extmath.randomized_svd`
            - 'full'
                Computes full svd using :func:`scipy.linalg.svd`
                Does not support ``graph`` input of type scipy.sparse.csr_matrix
            - 'truncated'
                Computes truncated svd using :func:`scipy.sparse.linalg.svds`
    svd_solver_iterations : int (default=5)
        Number of iterations for randomized SVD solver. Not used by 'full' or
        'truncated'. The default is larger than the default in randomized_svd
        to handle sparse matrices that may have large slowly decaying spectrum.
    svd_seed : Optional[int] (default=None)
        Used to seed the PRNG used in the ``randomized`` svd solver algorithm.
    weight_attribute : str (default="weight")
        The edge dictionary key that contains the weight of the edge.

    Returns
    -------
    Embeddings

    Raises
    ------
    beartype.roar.BeartypeCallHintParamViolation if parameters do not match type hints
    ValueError if values are not within appropriate ranges or allowed values

    See Also
    --------
    graspologic.pipeline.embed.Embeddings
    graspologic.embed.AdjacencySpectralEmbed
    graspologic.embed.select_svd

    Notes
    -----
    The singular value decomposition:

    .. math:: A = U \Sigma V^T

    is used to find an orthonormal basis for a matrix, which in our case is the
    adjacency matrix of the graph. These basis vectors (in the matrices U or V) are
    ordered according to the amount of variance they explain in the original matrix.
    By selecting a subset of these basis vectors (through our choice of dimensionality
    reduction) we can find a lower dimensional space in which to represent the graph.

    References
    ----------
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
       Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
       Journal of the American Statistical Association, Vol. 107(499), 2012

    .. [2] Levin, K., Roosta-Khorasani, F., Mahoney, M. W., & Priebe, C. E. (2018).
        Out-of-sample extension of graph adjacency spectral embedding. PMLR: Proceedings
        of Machine Learning Research, 80, 2975-2984.

    .. [3] Zhu, M. and Ghodsi, A. (2006). Automatic dimensionality selection from the
        scree plot via the use of profile likelihood. Computational Statistics & Data
        Analysis, 51(2), pp.918-930.

    """
    check_argument(dimensions >= 1, "dimensions must be positive")

    check_argument(elbow_cut is None or elbow_cut >= 1, "elbow_cut must be positive")

    check_argument(
        svd_solver_algorithm in __SVD_SOLVER_TYPES,
        f"svd_solver_algorithm must be one of the values in {','.join(__SVD_SOLVER_TYPES)}",
    )

    check_argument(svd_solver_iterations >= 1, "svd_solver_iterations must be positive")

    check_argument(
        svd_seed is None or 0 <= svd_seed <= 2**32 - 1,
        "svd_seed must be a nonnegative, 32-bit integer",
    )

    check_argument(
        not graph.is_multigraph(),
        "Multigraphs are not supported; you must determine how to represent at most "
        "one edge between any two nodes, and handle the corresponding weights "
        "accordingly",
    )

    used_weight_attribute: Optional[str] = weight_attribute
    if not is_real_weighted(graph, weight_attribute=weight_attribute):
        warnings.warn(
            f"Graphs with edges that do not have a real numeric weight set for every "
            f"{weight_attribute} attribute on every edge are treated as an unweighted "
            f"graph - which presumes all weights are `1.0`. If this is incorrect, "
            f"please add a '{weight_attribute}' attribute to every edge with a real, "
            f"numeric value (e.g. an integer or a float) and call this function again."
        )
        used_weight_attribute = None  # this supercedes what the user said, because
        # not all of the weights are real numbers, if they exist at all
        # this weight=1.0 treatment actually happens in nx.to_scipy_sparse_matrix()

    node_labels = np.array(list(graph.nodes()))

    graph_as_csr = nx.to_scipy_sparse_matrix(
        graph, weight=used_weight_attribute, nodelist=node_labels
    )

    if not is_fully_connected(graph):
        warnings.warn("More than one connected component detected")

    graph_sans_loops = remove_loops(graph_as_csr)

    ranked_graph = pass_to_ranks(graph_sans_loops)

    augmented_graph = augment_diagonal(ranked_graph)

    embedder = AdjacencySpectralEmbed(
        n_components=dimensions,
        n_elbows=None,  # in the short term, we do our own elbow finding
        algorithm=svd_solver_algorithm,
        n_iter=svd_solver_iterations,
        svd_seed=svd_seed,
        concat=False,
        diag_aug=False,
    )
    results = embedder.fit_transform(augmented_graph)
    results_arr: np.ndarray

    if elbow_cut is None:
        if isinstance(results, tuple) or graph.is_directed():
            results_arr = np.concatenate(results, axis=1)
        else:
            results_arr = results
    else:
        column_index = _index_of_elbow(embedder.singular_values_, elbow_cut)
        if isinstance(results, tuple):
            left, right = results
            left = left[:, :column_index]
            right = right[:, :column_index]
            results_arr = np.concatenate((left, right), axis=1)
        else:
            results_arr = results[:, :column_index]

    embeddings = Embeddings(node_labels, results_arr)
    return embeddings
コード例 #9
0
#%% [markdown]
# ### Load the data

#%%

mg = load_maggot_graph()
mg = mg[mg.nodes["paper_clustered_neurons"]]
# mg = mg[mg.nodes["left"]]
# mg = mg[mg.nodes["class1"] == "KC"]

#%%
import networkx as nx

adj = mg.sum.adj.copy()
adj = remove_loops(adj)
adj = binarize(adj)
g = nx.from_numpy_array(adj, create_using=nx.DiGraph)
nodelist = sorted(g.nodes)
incidence = nx.incidence_matrix(g, nodelist=nodelist, oriented=True).T
weights = np.ones(incidence.shape[0])
from sklearn.linear_model import LinearRegression

lr = LinearRegression(fit_intercept=False, n_jobs=-1)

lr.fit(incidence, weights)

lr_score = lr.coef_
mg.nodes["lr_score"] = lr_score
mg.nodes.sort_values("lr_score", inplace=True)
from src.visualization import adjplot, CLASS_COLOR_DICT
コード例 #10
0
def laplacian_spectral_embedding(
    graph: Union[nx.Graph, nx.OrderedGraph, nx.DiGraph, nx.OrderedDiGraph],
    form: LaplacianFormType = "R-DAD",
    dimensions: int = 100,
    elbow_cut: Optional[int] = None,
    svd_solver_algorithm: SvdAlgorithmType = "randomized",
    svd_solver_iterations: int = 5,
    svd_seed: Optional[int] = None,
    weight_attribute: str = "weight",
    regularizer: Optional[numbers.Real] = None,
) -> Embeddings:
    """
    Given a directed or undirected networkx graph (*not* multigraph), generate an
    Embeddings object.

    The laplacian spectral embedding process is similar to the adjacency spectral
    embedding process, with the key differentiator being that the LSE process looks
    further into the latent space when it captures changes, whereas the ASE process
    is egocentric and focused on immediate differentiators in a node's periphery.

    All weights will be rescaled based on their relative rank in the graph,
    which is beneficial in minimizing anomalous results if some edge weights are
    extremely atypical of the rest of the graph.

    Parameters
    ----------
    graph : Union[nx.Graph, nx.OrderedGraph, nx.DiGraph, nx.OrderedDiGraph]
        An undirected or directed graph. The graph **must**:

        - be fully numerically weighted (every edge must have a real, numeric weight
          or else it will be treated as an unweighted graph)
        - be a basic graph (meaning it should not be a multigraph; if you have a
          multigraph you must first decide how you want to handle the weights of the
          edges between two nodes, whether summed, averaged, last-wins,
          maximum-weight-only, etc)
    form : str (default="R-DAD")
        Specifies the type of Laplacian normalization to use. Allowed values are:
        { "DAD", "I-DAD", "R-DAD" }.  See
        :func:`~graspologic.utils.to_laplacian` for more details regarding form.
    dimensions : int (default=100)
        Dimensions to use for the svd solver.
        For undirected graphs, if ``elbow_cut==None``, you will receive an embedding
        that has ``nodes`` rows and ``dimensions`` columns.
        For directed graphs, if ``elbow_cut==None``, you will receive an embedding that
        has ``nodes`` rows and ``2*dimensions`` columns.
        If ``elbow_cut`` is specified to be not ``None``, we will cut the embedding at
        ``elbow_cut`` elbow, but the provided ``dimensions`` will be used in the
        creation of the SVD.
    elbow_cut : Optional[int] (default=None)
        Using a process described by Zhu & Ghodsi in their paper "Automatic
        dimensionality selection from the scree plot via the use of profile likelihood",
        truncate the dimensionality of the return on the ``elbow_cut``-th elbow.
        By default this value is ``None`` but can be used to reduce the dimensionality
        of the returned tensors.
    svd_solver_algorithm : str (default="randomized")
        allowed values: {'randomized', 'full', 'truncated'}

        SVD solver to use:

            - 'randomized'
                Computes randomized svd using
                :func:`sklearn.utils.extmath.randomized_svd`
            - 'full'
                Computes full svd using :func:`scipy.linalg.svd`
                Does not support ``graph`` input of type scipy.sparse.csr_matrix
            - 'truncated'
                Computes truncated svd using :func:`scipy.sparse.linalg.svds`
    svd_solver_iterations : int (default=5)
        Number of iterations for randomized SVD solver. Not used by 'full' or
        'truncated'. The default is larger than the default in randomized_svd
        to handle sparse matrices that may have large slowly decaying spectrum.
    svd_seed : Optional[int] (default=None)
        Used to seed the PRNG used in the ``randomized`` svd solver algorithm.
    weight_attribute : str (default="weight")
        The edge dictionary key that contains the weight of the edge.
    regularizer : Optional[numbers.Real] (default=None)
        Only used when form="R-DAD". Must be None or nonnegative.
        Constant to be added to the diagonal of degree matrix. If None, average
        node degree is added. If int or float, must be >= 0.

    Returns
    -------
    Embeddings

    Raises
    ------
    beartype.roar.BeartypeCallHintParamViolation if parameters do not match type hints
    ValueError if values are not within appropriate ranges or allowed values

    See Also
    --------
    graspologic.pipeline.embed.Embeddings
    graspologic.embed.LaplacianSpectralEmbed
    graspologic.embed.select_svd
    graspologic.utils.to_laplacian

    Notes
    -----
    The singular value decomposition:

    .. math:: A = U \Sigma V^T

    is used to find an orthonormal basis for a matrix, which in our case is the
    Laplacian matrix of the graph. These basis vectors (in the matrices U or V) are
    ordered according to the amount of variance they explain in the original matrix.
    By selecting a subset of these basis vectors (through our choice of dimensionality
    reduction) we can find a lower dimensional space in which to represent the graph.

    References
    ----------
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
       Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
       Journal of the American Statistical Association, Vol. 107(499), 2012.

    .. [2] Von Luxburg, Ulrike. "A tutorial on spectral clustering," Statistics
        and computing, Vol. 17(4), pp. 395-416, 2007.

    .. [3] Rohe, Karl, Sourav Chatterjee, and Bin Yu. "Spectral clustering and
        the high-dimensional stochastic blockmodel," The Annals of Statistics,
        Vol. 39(4), pp. 1878-1915, 2011.

    .. [4] Zhu, M. and Ghodsi, A. (2006). Automatic dimensionality selection from the
        scree plot via the use of profile likelihood. Computational Statistics & Data
        Analysis, 51(2), pp.918-930.

    """
    check_argument(form in __FORMS,
                   f"form must be one of the values in {','.join(__FORMS)}")

    check_argument(dimensions >= 1, "dimensions must be positive")

    check_argument(elbow_cut is None or elbow_cut >= 1,
                   "elbow_cut must be positive")

    check_argument(
        svd_solver_algorithm in __SVD_SOLVER_TYPES,
        f"svd_solver_algorithm must be one of the values in {','.join(__SVD_SOLVER_TYPES)}",
    )

    check_argument(svd_solver_iterations >= 1,
                   "svd_solver_iterations must be positive")

    check_argument(
        svd_seed is None or 0 <= svd_seed <= 2**32 - 1,
        "svd_seed must be a nonnegative, 32-bit integer",
    )

    check_argument(
        regularizer is None or float(regularizer) >= 0,
        "regularizer must be nonnegative",
    )

    check_argument(
        not graph.is_multigraph(),
        "Multigraphs are not supported; you must determine how to represent at most "
        "one edge between any two nodes, and handle the corresponding weights "
        "accordingly",
    )

    used_weight_attribute: Optional[str] = weight_attribute
    if not is_real_weighted(graph, weight_attribute=weight_attribute):
        warnings.warn(
            f"Graphs with edges that do not have a real numeric weight set for every "
            f"{weight_attribute} attribute on every edge are treated as an unweighted "
            f"graph - which presumes all weights are `1.0`. If this is incorrect, "
            f"please add a '{weight_attribute}' attribute to every edge with a real, "
            f"numeric value (e.g. an integer or a float) and call this function again."
        )
        used_weight_attribute = None  # this supercedes what the user said, because
        # not all of the weights are real numbers, if they exist at all
        # this weight=1.0 treatment actually happens in nx.to_scipy_sparse_matrix()

    node_labels = np.array(list(graph.nodes()))

    graph_as_csr = nx.to_scipy_sparse_matrix(graph,
                                             weight=used_weight_attribute,
                                             nodelist=node_labels)

    if not is_fully_connected(graph):
        warnings.warn("More than one connected component detected")

    graph_sans_loops = remove_loops(graph_as_csr)

    ranked_graph = pass_to_ranks(graph_sans_loops)

    embedder = LaplacianSpectralEmbed(
        form=form,
        n_components=dimensions,
        n_elbows=None,  # in the short term, we do our own elbow finding
        algorithm=svd_solver_algorithm,
        n_iter=svd_solver_iterations,
        svd_seed=svd_seed,
        concat=False,
    )
    results = embedder.fit_transform(ranked_graph)
    results_arr: np.ndarray

    if elbow_cut is None:
        if isinstance(results, tuple) or graph.is_directed():
            results_arr = np.concatenate(results, axis=1)
        else:
            results_arr = results
    else:
        column_index = _index_of_elbow(embedder.singular_values_, elbow_cut)
        if isinstance(results, tuple):
            left, right = results
            left = left[:, :column_index]
            right = right[:, :column_index]
            results_arr = np.concatenate((left, right), axis=1)
        else:
            results_arr = results[:, :column_index]

    embeddings = Embeddings(node_labels, results_arr)
    return embeddings
コード例 #11
0
ファイル: multiplex.py プロジェクト: dPys/PyNets
def matching(
    paths,
    atlas,
    namer_dir,
):
    import glob
    import networkx as nx
    import numpy as np
    from pynets.core import thresholding
    from pynets.statistics.utils import parse_closest_ixs
    from graspologic.utils import remove_loops, symmetrize, \
        multigraph_lcc_intersection

    [dwi_graph_path, func_graph_path] = paths
    dwi_mat = np.load(dwi_graph_path)
    func_mat = np.load(func_graph_path)
    dwi_mat = thresholding.autofix(symmetrize(remove_loops(dwi_mat)))
    func_mat = thresholding.autofix(symmetrize(remove_loops(func_mat)))
    dwi_mat = thresholding.standardize(dwi_mat)
    func_mat = thresholding.standardize(func_mat)

    node_dict_dwi = parse_closest_ixs(
        glob.glob(f"{str(Path(dwi_graph_path).parent.parent)}"
                  f"/nodes/*.json"), dwi_mat.shape[0])[1]

    node_dict_func = parse_closest_ixs(
        glob.glob(f"{str(Path(func_graph_path).parent.parent)}"
                  f"/nodes/*.json"), func_mat.shape[0])[1]

    G_dwi = nx.from_numpy_array(dwi_mat)
    nx.set_edge_attributes(G_dwi, 'structural',
                           nx.get_edge_attributes(G_dwi, 'weight').values())
    nx.set_node_attributes(G_dwi, dict(node_dict_dwi), name='dwi')
    #G_dwi.nodes(data=True)

    G_func = nx.from_numpy_array(func_mat)
    nx.set_edge_attributes(G_func, 'functional',
                           nx.get_edge_attributes(G_func, 'weight').values())
    nx.set_node_attributes(G_func, dict(node_dict_func), name='func')
    #G_func.nodes(data=True)

    R = G_dwi.copy()
    R.remove_nodes_from(n for n in G_dwi if n not in G_func)
    R.remove_edges_from(e for e in G_dwi.edges if e not in G_func.edges)
    G_dwi = R.copy()

    R = G_func.copy()
    R.remove_nodes_from(n for n in G_func if n not in G_dwi)
    R.remove_edges_from(e for e in G_func.edges if e not in G_dwi.edges)
    G_func = R.copy()

    [G_dwi, G_func] = multigraph_lcc_intersection([G_dwi, G_func])

    def writeJSON(metadata_str, outputdir):
        import json
        import uuid
        modality = metadata_str.split('modality-')[1].split('_')[0]
        metadata_list = [
            i for i in metadata_str.split('modality-')[1].split('_')
            if '-' in i
        ]
        hash = str(uuid.uuid4())
        filename = f"{outputdir}/sidecar_modality-{modality}_{hash}.json"
        metadata_dict = {}
        for meta in metadata_list:
            k, v = meta.split('-')
            metadata_dict[k] = v
        with open(filename, 'w+') as jsonfile:
            json.dump(metadata_dict, jsonfile, indent=4)
        jsonfile.close()
        return hash

    dwi_name = dwi_graph_path.split("/")[-1].split(".npy")[0]
    func_name = func_graph_path.split("/")[-1].split(".npy")[0]

    dwi_hash = writeJSON(dwi_name, namer_dir)
    func_hash = writeJSON(func_name, namer_dir)

    name = f"{atlas}_mplx_layer1-dwi_ensemble-{dwi_hash}_" \
           f"layer2-func_ensemble-{func_hash}"

    dwi_opt, func_opt, best_mi = optimize_mutual_info(
        nx.to_numpy_array(G_dwi), nx.to_numpy_array(G_func), bins=50)

    func_mat_final = list(func_opt.values())[0]
    dwi_mat_final = list(dwi_opt.values())[0]
    G_dwi_final = nx.from_numpy_array(dwi_mat_final)
    G_func_final = nx.from_numpy_array(func_mat_final)

    G_multi = nx.OrderedMultiGraph(nx.compose(G_dwi_final, G_func_final))

    out_name = f"{name}_matchthr-{list(dwi_opt.keys())[0]}_" \
               f"{list(func_opt.keys())[0]}"
    mG = build_mx_multigraph(nx.to_numpy_array(G_func_final),
                             nx.to_numpy_array(G_dwi_final), out_name,
                             namer_dir)

    mG_nx = f"{namer_dir}/{out_name}.gpickle"
    nx.write_gpickle(G_multi, mG_nx)

    dwi_file_out = f"{namer_dir}/{dwi_name}.npy"
    func_file_out = f"{namer_dir}/{func_name}.npy"
    np.save(dwi_file_out, dwi_mat_final)
    np.save(func_file_out, func_mat_final)
    return mG_nx, mG, dwi_file_out, func_file_out