Exemple #1
0
    def __init__(self,
                 est_path,
                 prune,
                 norm,
                 out_fmt="gpickle",
                 remove_self_loops=True):
        import graspologic.utils as gu

        self.est_path = est_path
        self.prune = prune
        self.norm = norm
        self.out_fmt = out_fmt
        self.in_mat = None

        # Load and threshold matrix
        self.in_mat_raw = utils.load_mat(self.est_path)

        # De-diagnal and remove nan's and inf's, ensure edge weights are
        # positive
        self.in_mat = np.array(
            np.array(thresholding.autofix(np.array(np.abs(self.in_mat_raw)))))

        # Remove self-loops and ensure symmetry
        if remove_self_loops is True:
            self.in_mat = gu.remove_loops(gu.symmetrize(self.in_mat))
        else:
            self.in_mat = gu.symmetrize(self.in_mat)

        self.in_mat[np.where(np.isnan(self.in_mat)
                             | np.isinf(self.in_mat))] = 0

        # Create nx graph
        self.G = nx.from_numpy_array(self.in_mat)
Exemple #2
0
def sbm_corr_weighted(n, mu1, mu2, Sigma, directed=False, loops=False):
    """
    Parameters
    ----------
    n: list of int, shape (n_communities)
        Number of vertices in each community. Communities are assigned n[0], n[1], ...
    mu1: array-like, shape (n_communities, n_communities)
        Mean of the edge weight between each of the communities in the first graph, where mu1[i, j] indicates
        the mean of the edge weights of edges in communities [i, j].
    mu2: array-like, shape (n_communities, n_communities)
        same as mu1, but for the second graph
    Sigma: list or ndarray (2, 2)
        The covariance matrix encoding the variances of the edge weights of G1, G2
        and the covariance beteween them. 
        Right now we are forcing the entire graph to have the same variance and covariance
    """
    n = np.array(n)
    G1 = np.zeros((np.sum(n), np.sum(n)))
    G2 = np.zeros((np.sum(n), np.sum(n)))
    block_indices = np.insert(np.cumsum(np.array(n)), 0, 0)
    for i in range(n.size):  # for each row
        for j in range(n.size):  # for each column
            g1, g2 = sample_edges_corr_weighted((n[i], n[j]), mu1[i][j],
                                                mu2[i][j], Sigma)
            G1[block_indices[i]:block_indices[i + 1],
               block_indices[j]:block_indices[j + 1], ] = g1
            G2[block_indices[i]:block_indices[i + 1],
               block_indices[j]:block_indices[j + 1], ] = g2
    if not directed:
        G1 = symmetrize(G1, method="triu")
        G2 = symmetrize(G2, method="triu")
    if not loops:
        G1 = G1 - np.diag(np.diag(G1))
        G2 = G2 - np.diag(np.diag(G2))
    return G1, G2
Exemple #3
0
def er_corr_weighted(n, mu1, mu2, Sigma, directed=False, loops=False):
    """
    Generate a pair of correlated graphs with the bivariate normal distribution.
    Both G1 and G2 are non-binary matrices.
    Every pair of edges is distributed as a bivariate normal, with mean = [mu1, mu2]
    and covariance matrix Sigma
    The correlation between G1 and G2 is Sigma12 / sqrt(Sigma11 * Sigma22)
    Parameters
    ----------
    n: int
       Number of vertices
    mu1: float
        The mean of the edge weights of G1 (analogous the marginal probability p in correlated Bernoulli graph)
    mu2: float
        The mean of the edge weights of G2 (analogous the marginal probability q in correlated Bernoulli graph)
    Sigma: list or ndarray (2, 2)
        The covariance matrix encoding the variances of the edge weights of G1, G2
        and the covariance beteween them
    Returns
    -------
    G1: ndarray (n_vertices, n_vertices)
        Adjacency matrix representing a random graph.
    G2: ndarray (n_vertices, n_vertices)
        Adjacency matrix representing a random graph.
    """
    G1, G2 = sample_edges_corr_weighted((n, n), mu1, mu2, Sigma)
    if not directed:
        G1 = symmetrize(G1, method="triu")
        G2 = symmetrize(G2, method="triu")
    if not loops:
        G1 = G1 - np.diag(np.diag(G1))
        G2 = G2 - np.diag(np.diag(G2))
    return G1, G2
Exemple #4
0
    def _gen_mat_data(n: int = 20,
                      m: int = 20,
                      p: int = 0.50,
                      mat_type: str = 'sb',
                      binary: bool = False,
                      asfile: bool = True,
                      n_graphs: int = 1):
        if binary is True:
            wt = 1
        else:
            wt = np.random.uniform

        mat_list = []
        mat_file_list = []
        for nm in range(n_graphs):
            if mat_type == 'er':
                mat = largest_connected_component(
                    symmetrize(
                        remove_loops(
                            er_nm(n,
                                  m,
                                  wt=np.random.uniform,
                                  wtargs=dict(low=0, high=1)))))
            elif mat_type == 'sb':
                if p is None:
                    raise ValueError(
                        f"for mat_type {mat_type}, p cannot be None")
                mat = largest_connected_component(
                    symmetrize(
                        remove_loops(
                            sbm(np.array([n]),
                                np.array([[p]]),
                                wt=wt,
                                wtargs=dict(low=0, high=1)))))
            else:
                raise ValueError(f"mat_type {mat_type} not recognized!")

            mat_list.append(mat)

            if asfile is True:
                mat_path_tmp = tempfile.NamedTemporaryFile(mode='w+',
                                                           suffix='.npy',
                                                           delete=False)
                mat_path = str(mat_path_tmp.name)
                np.save(mat_path, mat)
                mat_file_list.append(mat_path)
                mat_path_tmp.close()

        return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
Exemple #5
0
    def test_eigsh(self):
        np.random.seed(123)
        X = np.vstack([
            np.repeat([[0.2, 0.2, 0.2]], 50, axis=0),
            np.repeat([[0.5, 0.5, 0.5]], 50, axis=0),
        ])
        P = X @ X.T
        A = np.random.binomial(1, P).astype(np.float)
        A = symmetrize(A, method="triu")
        n_components = 3

        # Full SVD
        U_full, D_full, V_full = select_svd(A,
                                            n_components=n_components,
                                            algorithm="full")
        X_full = U_full @ np.diag(np.sqrt(D_full))
        _, _, norm_full = procrustes(X, X_full)

        # eigsh SVD
        U_square, D_square, V_square = select_svd(A,
                                                  n_components=n_components,
                                                  algorithm="eigsh",
                                                  n_iter=10)
        X_square = U_square @ np.diag(np.sqrt(D_square))
        _, _, norm_square = procrustes(X, X_square)

        rtol = 1e-4
        atol = 1e-4
        np.testing.assert_allclose(norm_full, norm_square, rtol, atol)
Exemple #6
0
    def _gen_mat_data(n: int=20, m: int=20, p: int=0.50,
                      mat_type: str='sb', binary: bool=False,
                      asfile: bool=True, n_graphs: int=1,
                      lcc: bool=False, modality: str='func'):
        if binary is True:
            wt = 1
        else:
            wt = np.random.uniform

        mat_list = []
        mat_file_list = []

        if n_graphs > 0:
            for nm in range(n_graphs):
                if mat_type == 'er':
                    mat = symmetrize(
                        remove_loops(er_nm(n, m, wt=np.random.uniform,
                                           wtargs=dict(low=0, high=1))))
                elif mat_type == 'sb':
                    if p is None:
                        raise ValueError(
                            f"for mat_type {mat_type}, p cannot be None")
                    mat = symmetrize(
                        remove_loops(sbm(np.array([n]), np.array([[p]]),
                                         wt=wt, wtargs=dict(low=0,
                                                            high=1))))
                else:
                    raise ValueError(f"mat_type {mat_type} not recognized!")

                if lcc is True:
                    mat = largest_connected_component(mat)

                mat_list.append(autofix(mat))

                if asfile is True:
                    path_tmp = tempfile.NamedTemporaryFile(mode='w+',
                                                           suffix='.npy',
                                                           delete=False)
                    mat_path_tmp = str(path_tmp.name)
                    out_folder = f"{str(Path.home())}/test_mats"
                    os.makedirs(out_folder, exist_ok=True)

                    if modality == 'func':
                        mat_path = f"{out_folder}/graph_sub-999_modality-func_" \
                        f"model-corr_template-" \
                        f"MNI152_2mm_" \
                        f"parc_tol-6fwhm_hpass-" \
                        f"0Hz_" \
                        f"signal-mean_thrtype-prop_thr-" \
                        f"{round(random.uniform(0, 1),2)}.npy"
                    elif modality == 'dwi':
                        mat_path = f"{out_folder}/graph_sub-999_modality-func_" \
                        f"model-csa_template-" \
                        f"MNI152_2mm_tracktype-local_" \
                        f"traversal-det_minlength-30_" \
                        f"tol-5_thrtype-prop_thr-" \
                        f"{round(random.uniform(0, 1),2)}.npy"

                    shutil.copyfile(mat_path_tmp, mat_path)
                    np.save(mat_path, mat)
                    mat_file_list.append(mat_path)
                    path_tmp.close()

        return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
Exemple #7
0
alpha = 0.7
for source_node in anytree.PreOrderIter(root):
    for target_node in anytree.PreOrderIter(root):
        if source_node.is_leaf and target_node.is_leaf:
            nca = nearest_common_ancestor(source_node, target_node).name
            base_prob = probs[nca]
            new_prob = np.random.uniform(base_prob - alpha * base_prob,
                                         base_prob + alpha * base_prob)
            i = source_node.name
            j = target_node.name
            sbm_probs.loc[i, j] = new_prob

from graspologic.utils import symmetrize

sbm_probs = sbm_probs.values
sbm_probs = symmetrize(sbm_probs)
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
adjplot(sbm_probs, ax=ax)

# %%
flat_labels = []
node_data = mt.node_data
for node, row in node_data.iterrows():
    path = row.values[:4]
    path = path[~np.isnan(path)]
    label = path[-1]
    flat_labels.append(label)

flat_labels = np.array(flat_labels)

#%%
Exemple #8
0
    U, D, Vt = selectSVD(X, n_components=n_components)
    return U, Vt.T


#%%
from pathlib import Path
import networkx as nx
from graspologic.utils import pass_to_ranks, get_lcc, symmetrize

data_dir = Path("sparse_new_basis/data/maggot")
g = nx.read_weighted_edgelist(
    data_dir / "G.edgelist", create_using=nx.DiGraph, nodetype=int
)
meta = pd.read_csv(data_dir / "meta_data.csv", index_col=0)
adj = nx.to_numpy_array(g, nodelist=meta.index)
adj = symmetrize(adj)
adj, inds = get_lcc(adj, return_inds=True)
meta = meta.iloc[inds]

hemisphere = "left"
if hemisphere == "left":
    meta["inds"] = np.arange(len(meta))
    meta = meta[meta["left"]]
    inds = meta["inds"]
    adj = adj[np.ix_(inds, inds)]
# TODO just try with one hemisphere

preprocessing = "ptr"
if preprocessing == "ptr":
    adj_to_embed = pass_to_ranks(adj)
elif preprocessing == "sqrt":
Exemple #9
0
def motif_matching(
    paths,
    ID,
    atlas,
    namer_dir,
    name_list,
    metadata_list,
    multigraph_list_all,
    graph_path_list_all,
    rsn=None,
):
    import networkx as nx
    import numpy as np
    import glob
    import pickle
    from pynets.core import thresholding
    from pynets.stats.netmotifs import compare_motifs
    from sklearn.metrics.pairwise import cosine_similarity
    from pynets.stats.netstats import community_resolution_selection
    from graspologic.utils import remove_loops, symmetrize
    from pynets.core.nodemaker import get_brainnetome_node_attributes

    [struct_graph_path, func_graph_path] = paths
    struct_mat = np.load(struct_graph_path)
    func_mat = np.load(func_graph_path)

    [struct_coords, struct_labels, struct_label_intensities] = \
        get_brainnetome_node_attributes(glob.glob(
            f"{str(Path(struct_graph_path).parent.parent)}/nodes/*.json"),
        struct_mat.shape[0])

    [func_coords, func_labels, func_label_intensities] = \
        get_brainnetome_node_attributes(glob.glob(
            f"{str(Path(func_graph_path).parent.parent)}/nodes/*.json"),
        func_mat.shape[0])

    # Find intersecting nodes across modalities (i.e. assuming the same
    # parcellation, but accomodating for the possibility of dropped nodes)
    diff1 = list(set(struct_label_intensities) - set(func_label_intensities))
    diff2 = list(set(func_label_intensities) - set(struct_label_intensities))
    G_struct = nx.from_numpy_array(struct_mat)
    G_func = nx.from_numpy_array(func_mat)

    bad_idxs = []
    for val in diff1:
        bad_idxs.append(struct_label_intensities.index(val))
        bad_idxs = sorted(list(set(bad_idxs)), reverse=True)
        if type(struct_coords) is np.ndarray:
            struct_coords = list(tuple(x) for x in struct_coords)
    for j in bad_idxs:
        G_struct.remove_node(j)
        print(f"Removing: {(struct_labels[j], struct_coords[j])}...")
        del struct_labels[j], struct_coords[j]

    bad_idxs = []
    for val in diff2:
        bad_idxs.append(func_label_intensities.index(val))
        bad_idxs = sorted(list(set(bad_idxs)), reverse=True)
        if type(func_coords) is np.ndarray:
            func_coords = list(tuple(x) for x in func_coords)
    for j in bad_idxs:
        G_func.remove_node(j)
        print(f"Removing: {(func_labels[j], func_coords[j])}...")
        del func_labels[j], func_coords[j]

    struct_mat = nx.to_numpy_array(G_struct)
    func_mat = nx.to_numpy_array(G_func)

    struct_mat = thresholding.autofix(symmetrize(remove_loops(struct_mat)))

    func_mat = thresholding.autofix(symmetrize(remove_loops(func_mat)))

    if func_mat.shape == struct_mat.shape:
        func_mat[~struct_mat.astype("bool")] = 0
        struct_mat[~func_mat.astype("bool")] = 0
        print(
            "Edge disagreements after matching: ",
            sum(sum(abs(func_mat - struct_mat))),
        )

        metadata = {}
        assert (len(struct_coords) == len(struct_labels) == len(func_coords) ==
                len(func_labels) == func_mat.shape[0])
        metadata["coords"] = struct_coords
        metadata["labels"] = struct_labels
        metadata_list.append(metadata)

        struct_mat = np.maximum(struct_mat, struct_mat.T)
        func_mat = np.maximum(func_mat, func_mat.T)
        struct_mat = thresholding.standardize(struct_mat)
        func_mat = thresholding.standardize(func_mat)

        struct_node_comm_aff_mat = community_resolution_selection(
            nx.from_numpy_matrix(np.abs(struct_mat)))[1]

        func_node_comm_aff_mat = community_resolution_selection(
            nx.from_numpy_matrix(np.abs(func_mat)))[1]

        struct_comms = []
        for i in np.unique(struct_node_comm_aff_mat):
            struct_comms.append(struct_node_comm_aff_mat == i)

        func_comms = []
        for i in np.unique(func_node_comm_aff_mat):
            func_comms.append(func_node_comm_aff_mat == i)

        sims = cosine_similarity(struct_comms, func_comms)
        try:
            struct_comm = struct_comms[np.argmax(sims, axis=0)[0]]
        except BaseException:
            print('Matching by structural communities failed...')
            struct_comm = struct_mat
        try:
            func_comm = func_comms[np.argmax(sims, axis=0)[0]]
        except BaseException:
            print('Matching by functional communities failed...')
            func_comm = func_mat

        comm_mask = np.equal.outer(struct_comm, func_comm).astype(bool)

        try:
            assert comm_mask.shape == struct_mat.shape == func_mat.shape
        except AssertionError as e:
            e.args += (comm_mask, comm_mask.shape, struct_mat,
                       struct_mat.shape, func_mat, func_mat.shape)

        try:
            struct_mat[~comm_mask] = 0
        except BaseException:
            print('Skipping community masking...')
        try:
            func_mat[~comm_mask] = 0
        except BaseException:
            print('Skipping community masking...')

        struct_name = struct_graph_path.split("/rawgraph_")[-1].split(
            ".npy")[0]
        func_name = func_graph_path.split("/rawgraph_")[-1].split(".npy")[0]
        name = f"sub-{ID}_{atlas}_mplx_Layer-1_{struct_name}_" \
               f"Layer-2_{func_name}"
        name_list.append(name)
        struct_mat = np.maximum(struct_mat, struct_mat.T)
        func_mat = np.maximum(func_mat, func_mat.T)
        try:
            [mldict, g_dict] = compare_motifs(struct_mat, func_mat, name,
                                              namer_dir)
        except BaseException:
            print(f"Adaptive thresholding by motif comparisons failed "
                  f"for {name}. This usually happens when no motifs are found")
            return [], [], [], []

        multigraph_list_all.append(list(mldict.values())[0])
        graph_path_list = []
        for thr in list(g_dict.keys()):
            multigraph_path_list_dict = {}
            [struct, func] = g_dict[thr]
            struct_out = f"{namer_dir}/struct_{atlas}_{struct_name}.npy"
            func_out = f"{namer_dir}/struct_{atlas}_{func_name}_" \
                       f"motif-{thr}.npy"
            np.save(struct_out, struct)
            np.save(func_out, func)
            multigraph_path_list_dict[f"struct_{atlas}_{thr}"] = struct_out
            multigraph_path_list_dict[f"func_{atlas}_{thr}"] = func_out
            graph_path_list.append(multigraph_path_list_dict)
        graph_path_list_all.append(graph_path_list)
    else:
        print(
            f"Skipping {rsn} rsn, since structural and functional graphs are "
            f"not identical shapes.")

    return name_list, metadata_list, multigraph_list_all, graph_path_list_all
Exemple #10
0

colors = list(map(get_rgb, cc.glasbey_light))
color_objs = [sRGBColor(*rgb) for rgb in colors]
color_objs = [convert_color(x, LabColor) for x in color_objs]

color_pairs = cartesian_product(color_objs, color_objs)

color_dist_mat = np.empty((len(colors), len(colors)))
for i, color1 in enumerate(color_objs):
    for j, color2 in enumerate(color_objs):
        dist = delta_e_cie2000(color1, color2)
        color_dist_mat[i, j] = dist

print(is_almost_symmetric(color_dist_mat))
color_dist_mat = symmetrize(color_dist_mat)
#%%
Z = linkage(squareform(color_dist_mat), method="average")
sns.clustermap(
    color_dist_mat,
    row_colors=colors,
    col_colors=colors,
    row_linkage=Z,
    col_linkage=Z,
    xticklabels=False,
    yticklabels=False,
)
stashfig("clustermap")
# %%

cmds = ClassicalMDS(n_components=2, dissimilarity="precomputed")
Exemple #11
0
def matching(
    paths,
    atlas,
    namer_dir,
):
    import glob
    import networkx as nx
    import numpy as np
    from pynets.core import thresholding
    from pynets.statistics.utils import parse_closest_ixs
    from graspologic.utils import remove_loops, symmetrize, \
        multigraph_lcc_intersection

    [dwi_graph_path, func_graph_path] = paths
    dwi_mat = np.load(dwi_graph_path)
    func_mat = np.load(func_graph_path)
    dwi_mat = thresholding.autofix(symmetrize(remove_loops(dwi_mat)))
    func_mat = thresholding.autofix(symmetrize(remove_loops(func_mat)))
    dwi_mat = thresholding.standardize(dwi_mat)
    func_mat = thresholding.standardize(func_mat)

    node_dict_dwi = parse_closest_ixs(
        glob.glob(f"{str(Path(dwi_graph_path).parent.parent)}"
                  f"/nodes/*.json"), dwi_mat.shape[0])[1]

    node_dict_func = parse_closest_ixs(
        glob.glob(f"{str(Path(func_graph_path).parent.parent)}"
                  f"/nodes/*.json"), func_mat.shape[0])[1]

    G_dwi = nx.from_numpy_array(dwi_mat)
    nx.set_edge_attributes(G_dwi, 'structural',
                           nx.get_edge_attributes(G_dwi, 'weight').values())
    nx.set_node_attributes(G_dwi, dict(node_dict_dwi), name='dwi')
    #G_dwi.nodes(data=True)

    G_func = nx.from_numpy_array(func_mat)
    nx.set_edge_attributes(G_func, 'functional',
                           nx.get_edge_attributes(G_func, 'weight').values())
    nx.set_node_attributes(G_func, dict(node_dict_func), name='func')
    #G_func.nodes(data=True)

    R = G_dwi.copy()
    R.remove_nodes_from(n for n in G_dwi if n not in G_func)
    R.remove_edges_from(e for e in G_dwi.edges if e not in G_func.edges)
    G_dwi = R.copy()

    R = G_func.copy()
    R.remove_nodes_from(n for n in G_func if n not in G_dwi)
    R.remove_edges_from(e for e in G_func.edges if e not in G_dwi.edges)
    G_func = R.copy()

    [G_dwi, G_func] = multigraph_lcc_intersection([G_dwi, G_func])

    def writeJSON(metadata_str, outputdir):
        import json
        import uuid
        modality = metadata_str.split('modality-')[1].split('_')[0]
        metadata_list = [
            i for i in metadata_str.split('modality-')[1].split('_')
            if '-' in i
        ]
        hash = str(uuid.uuid4())
        filename = f"{outputdir}/sidecar_modality-{modality}_{hash}.json"
        metadata_dict = {}
        for meta in metadata_list:
            k, v = meta.split('-')
            metadata_dict[k] = v
        with open(filename, 'w+') as jsonfile:
            json.dump(metadata_dict, jsonfile, indent=4)
        jsonfile.close()
        return hash

    dwi_name = dwi_graph_path.split("/")[-1].split(".npy")[0]
    func_name = func_graph_path.split("/")[-1].split(".npy")[0]

    dwi_hash = writeJSON(dwi_name, namer_dir)
    func_hash = writeJSON(func_name, namer_dir)

    name = f"{atlas}_mplx_layer1-dwi_ensemble-{dwi_hash}_" \
           f"layer2-func_ensemble-{func_hash}"

    dwi_opt, func_opt, best_mi = optimize_mutual_info(
        nx.to_numpy_array(G_dwi), nx.to_numpy_array(G_func), bins=50)

    func_mat_final = list(func_opt.values())[0]
    dwi_mat_final = list(dwi_opt.values())[0]
    G_dwi_final = nx.from_numpy_array(dwi_mat_final)
    G_func_final = nx.from_numpy_array(func_mat_final)

    G_multi = nx.OrderedMultiGraph(nx.compose(G_dwi_final, G_func_final))

    out_name = f"{name}_matchthr-{list(dwi_opt.keys())[0]}_" \
               f"{list(func_opt.keys())[0]}"
    mG = build_mx_multigraph(nx.to_numpy_array(G_func_final),
                             nx.to_numpy_array(G_dwi_final), out_name,
                             namer_dir)

    mG_nx = f"{namer_dir}/{out_name}.gpickle"
    nx.write_gpickle(G_multi, mG_nx)

    dwi_file_out = f"{namer_dir}/{dwi_name}.npy"
    func_file_out = f"{namer_dir}/{func_name}.npy"
    np.save(dwi_file_out, dwi_mat_final)
    np.save(func_file_out, func_mat_final)
    return mG_nx, mG, dwi_file_out, func_file_out