Ejemplo n.º 1
0
def mcl(graph, viz=False):
    
    mat = nx.to_numpy_matrix(graph)
    
    mod = -1
    
    for val in np.arange(1.2,3,0.1):
        
        res = mc.run_mcl(mat, inflation=val)
        clust = mc.get_clusters(res)
        q = mc.modularity(matrix=np.asmatrix(res), clusters=clust)
        if q > mod:
            clusters = clust
    
    if viz == False:
        
        labels = dict(zip(range(len(graph)),graph.nodes()))

        return[[labels.get(item) for item in clust] for clust in clusters]
    
    else:
        
        plt.figure(num=None, figsize=(20,20), dpi=50)
        pos = nx.spring_layout(graph)
        mc.draw_graph(mat, clusters, node_size=200, with_labels=False, edge_color="silver")
Ejemplo n.º 2
0
def plot_clusters(filename, matrix, clusters, height=5, width=7, dpi=300):
    from matplotlib import pyplot as plt

    fig, ax = plt.subplots(figsize=(width, height))

    mc.draw_graph(matrix,
                  clusters,
                  node_size=50,
                  with_labels=False,
                  edge_color="silver",
                  ax=ax)

    fig.savefig(filename, dpi=dpi)
    return
Ejemplo n.º 3
0
def export_node_list_withimage(encode_data,
                               name_data,
                               matrix_form=False,
                               thresh=5,
                               inflation=1.5,
                               header=True,
                               delim=',',
                               label=None,
                               filename='node_list.csv'):

    print('building graph')
    if matrix_form:
        numnodes = encode_data.shape[0]
        matrix = encode_data
    else:
        numnodes = len(encode_data)
        positions = {i: encode_data[i] for i in range(numnodes)}
        # use networkx to generate the graph
        network = nx.random_geometric_graph(numnodes, thresh, pos=positions)

        # then get the adjacency matrix (in sparse form)
        matrix = nx.to_scipy_sparse_matrix(network)

    print('runing mcl')
    #

    result = mc.run_mcl(matrix, inflation=inflation)
    clusters = mc.get_clusters(result)
    mc.draw_graph(matrix,
                  clusters,
                  node_size=50,
                  with_labels=False,
                  edge_color="silver")
    if label == None:
        label = [str(i) for i in range(numnodes)]
    f = open("node_list.csv", 'w')
    if header:
        f.write("Id,Label,Cluster-ID,image\n")
    i = 1
    for j in clusters:
        for node in j:
            pos = label[node].find('!')
            pos2 = name_data[node].find('RTW')
            sent = "\"" + label[node] + "\"" + delim + "\"" + label[
                node][:pos] + "\"" + delim + str(i) + delim + "\"" + name_data[
                    node][pos2 + 4:-4] + ".png" + "\"" + "\n"
            f.write(sent)
        i = i + 1
    f.close()
Ejemplo n.º 4
0
def graph_clustering(graph,cluster_rate = 1.5, draw = False):
    AS_Num = 0
    n_Matrix = nx.to_scipy_sparse_matrix(graph)
    result = mc.run_mcl(n_Matrix,inflation = cluster_rate)
    clusters = mc.get_clusters(result)
    print("Number of AS: " + str(len(clusters)))
    graph.graph['Total_AS'] = len(clusters)
    for c in clusters:
        for n_id in c:
            graph.add_node(n_id,AS_N = AS_Num)
        AS_Num += 1
    if(draw):
        mc.draw_graph(n_Matrix, clusters,
        node_size=10, with_labels=False, edge_color="black",width=0.2)
        plt.show()
Ejemplo n.º 5
0
def cluster_plot(adjacent,
                 title,
                 pos,
                 inflation=1.3,
                 filename=None,
                 labels=None,
                 label_flag=True,
                 node_size=150,
                 figsize=(6, 6),
                 use_nodeaslabel=False,
                 width=1):
    fig, ax = plt.subplots(1, 1, sharey=False, sharex=False, figsize=figsize)
    result = mc.run_mcl(adjacent, inflation=inflation)
    clusters = mc.get_clusters(result)  # get clusters
    plt.title(title)
    graph = nx.Graph(adjacent)
    clusters = complete_cluster(clusters, graph.nodes())
    if pos is None:
        pos = nx.spring_layout(graph, iterations=100)
    mc.draw_graph(adjacent,
                  clusters,
                  pos=pos,
                  with_labels=False,
                  edge_color="silver",
                  node_size=node_size,
                  width=width)
    if labels is None:
        if use_nodeaslabel:
            labels = {n: n for ni, n in enumerate(graph.nodes())}
        else:
            labels = {n: ci for ci, c in enumerate(clusters) for n in c}
    if pos is None:
        pos = nx.spring_layout(graph, iterations=100)
    if label_flag:
        nx.draw_networkx_labels(graph, pos, labels=labels)
    if not filename is None:
        plt.savefig(filename, bbox_inches='tight')
    else:
        plt.show(block=False)
    return clusters, pos, labels
Ejemplo n.º 6
0
def draw_mcl_clustering(clusters, matrix, network):
    '''
    draw mcl clustering with node index
    need to convert node name to node index to find position
    :param clusters:
    :param matrix:
    :param network:
    :return:
    '''
    pos = nx.nx_pydot.graphviz_layout(network)
    pos_index = dict()

    gene_list = list(network.nodes())
    for key, value in pos.items():
        key_id = gene_list.index(key)
        pos_index[key_id] = value

    mc.draw_graph(matrix,
                  clusters,
                  pos=pos_index,
                  node_size=50,
                  with_labels=False,
                  edge_color="silver")
Ejemplo n.º 7
0
import numpy as np
import markov_clustering as mc
import networkx as nx
import pandas as pd

file = pd.read_excel(open('D:/LEARN/GRADUATE2020/IDA/assignment3/mcl.xlsx',
                          'rb'),
                     sheet_name='Sheet3')
matrix = np.array(file)
g = nx.Graph()
innum = 2.1  #change inflation values
for i in matrix:
    g.add_edge(i[0], i[1], weight=i[2])
a = nx.to_numpy_array(g)
result = mc.run_mcl(a, inflation=innum)
for j in range(9):
    result = mc.run_mcl(result, inflation=innum)

clusters = mc.get_clusters(result)
#title=[('A',float),('B',float),('C',float),('D',float),('E',float),('F',float),('G',float),('H',float),('J',float),('K',float),('L',float),('M',float),('N',float),('P',float),('Q',float),('R',float),('S',float)]
mc.draw_graph(result, clusters, with_labels=True)
Ejemplo n.º 8
0
import markov_clustering as mc
import pandas as pd
import networkx as nx
import random
import matplotlib.pyplot as plt

# number of nodes to use
numnodes = 45

with open('data-weight.csv', 'rb') as f:
    G = nx.read_weighted_edgelist(f, delimiter=',')

# then get the adjacency matrix (in sparse form)
matrix = nx.to_scipy_sparse_matrix(G, weight='weight')

result = mc.run_mcl(matrix, inflation=2.5)
# result = mc.run_mcl(matrix)
clusters = mc.get_clusters(result)

mc.draw_graph(matrix,
              clusters,
              node_size=50,
              with_labels=False,
              edge_color="silver")

plt.title("Weighted Markov Clustering Algorithm")

plt.show()
Ejemplo n.º 9
0
Archivo: net.py Proyecto: a3609640/Test
def plot_EIF4F_coexp_network (x):
    EIF4F_InterPro_Net, melted_df_r, melted_df_b = EIF4F_inter_coexp (x)
    ## plot nodes interacting eIF4G1 by experiments and database
    ## plot edges for experimental interactions
    G = nx.from_pandas_edgelist(EIF4F_InterPro_Net,
                                'protein1',
                                'protein2', 
                                edge_attr = ['experimental','database'])
    G.add_nodes_from(nodes_for_adding = EIF4F_InterPro_Net.protein1.tolist())
    protein = list(EIF4F_InterPro_Net.protein1.unique())

    labels = [i for i in dict(G.nodes).keys()]
    labels = {i:i for i in dict(G.nodes).keys()}
    
    fig, ax = plt.subplots(figsize = (20,20))
    pos = nx.kamada_kawai_layout(G)
    #pos = nx.spring_layout(G, seed = 50)
    # Draw every protein
    nx.draw_networkx_nodes(G, 
                           pos, 
                           ax = ax, 
                           label =True, 
                           node_color='#cccccc', 
                           node_size=100)
    #nx.draw_networkx_nodes(G, pos, 
    #                       nodelist = ["EIF4A1","EIF4G1"], 
    #                       node_color='orange', 
    #                       node_size=100)
    # Draw POPULAR protein
    popular_protein = [item for item in protein if G.degree(item) > 20]
    nx.draw_networkx_nodes(G, pos, 
                           nodelist = popular_protein, 
                           node_color = 'orange', 
                           node_size = 100)
    nx.draw_networkx_edges(G, pos, ax=ax, 
                           width=1, 
                           edge_color="#cccccc")
    _ = nx.draw_networkx_labels(G, pos, labels, ax=ax)

    #Compute largest connected component of the network  (LC)
    #lsit the components in network (g)
    components = nx.connected_components(G)
    #compare among components and find the one having maximun length(LC)
    largest_component = max(components, key=len)
    #largest_component
    # Q1.draw LC
    subgraph = G.subgraph(largest_component)
    #pos = nx.spring_layout(subgraph) # force nodes to separte 
    #pos= graphviz_layout(G, prog = "neato")
    pos = nx.kamada_kawai_layout(G)
    betCent = nx.betweenness_centrality(subgraph, normalized=True, endpoints=True)
    node_color = [20000.0 * G.degree(v) for v in subgraph]
    node_size =  [v * 10000 for v in betCent.values()]
    plt.figure(figsize=(20,15))
    nx.draw_networkx(subgraph, pos = pos, with_labels = False,
                     node_color = node_color,
                     node_size = node_size)
    plt.axis('off')
    
    ## plot nodes interacting eIF4G1 by experiment and database
    ## plot edges of experimental interaction.
    G = nx.from_pandas_edgelist(melted_df_r, 
                                'protein1',
                                'protein2', 
                                edge_attr = ['sources','color'],
                                create_using = nx.MultiGraph())
    G.add_nodes_from(nodes_for_adding = melted_df_r.protein1.tolist())

    #weights = nx.get_edge_attributes(G,'weight').values()
    labels = [i for i in dict(G.nodes).keys()]
    labels = {i:i for i in dict(G.nodes).keys()}
    pos = nx.kamada_kawai_layout(G)
    #pos = nx.spring_layout(G, seed = 50)
    #pos= graphviz_layout(G, prog = "neato")
    fig, ax = plt.subplots(figsize = (20,20))
    nx.draw_networkx_nodes(G, pos, ax = ax, 
                           label =True, 
                           node_size = 100, 
                           cmap=plt.cm.Blues)
    nx.draw_networkx_edges(G, pos, edge_color = "r", ax=ax)
    _ = nx.draw_networkx_labels(G, pos, labels, ax=ax)
           
    fig, ax = plt.subplots(figsize = (20,20))
    # number of nodes to use
    numnodes = 200
    # generate random positions as a dictionary where the key is the node id and the value
    # is a tuple containing 2D coordinates
    positions = {i:(random.random() * 2 - 1, random.random() * 2 - 1) for i in range(numnodes)}
    # use networkx to generate the graph
    network = nx.random_geometric_graph(numnodes, 0.3, pos=positions)
    # then get the adjacency matrix (in sparse form)
    matrix = nx.to_scipy_sparse_matrix(network)
    # run the MCL algorithm on the adjacency matrix and retrieve the clusters
    result = mc.run_mcl(matrix)           # run MCL with default parameters
    clusters = mc.get_clusters(result)    # get clusters
    mc.draw_graph(matrix, clusters, pos=positions, node_size=50, with_labels=False, edge_color="silver")
Ejemplo n.º 10
0
def markov_clustering(G):
    matrix = nx.to_scipy_sparse_matrix(G)
    result = mc.run_mcl(matrix)
    clusters = mc.get_clusters(result)  
    mc.draw_graph(matrix, clusters,  node_size=50, with_labels=False, edge_color="silver")
    return clusters
Ejemplo n.º 11
0
import cooler
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import markov_clustering as mc
import networkx as nx

clr = cooler.Cooler(sys.argv[1])

chroms = clr.chroms()[:]

inter = np.zeros((chroms.shape[0], chroms.shape[0]))

for i1, c1 in enumerate(chroms['name']):
    for i2, c2 in enumerate(chroms['name']):
        if c1 != c2:

            inter[i1, i2] = clr.matrix(balance=False,
                                       sparse=True).fetch(c1, c2).mean()

plt.imshow(inter)
plt.show()

markov_inter = mc.run_mcl(inter)
clusters = mc.get_clusters(markov_inter)
mc.draw_graph(markov_inter,
              clusters,
              labels={num: name
                      for num, name in enumerate(chroms['name'])})
plt.show()
Ejemplo n.º 12
0
def draw_clustering(matrix, clusters):
    mc.draw_graph(matrix, clusters, node_size=4, with_labels=False, edge_color="white") #display clusters
Ejemplo n.º 13
0
import markov_clustering
import networkx
import random


numnodes = 100
positions = {i:(random.random() * 2 - 1, random.random() * 2 - 1) for i in range(numnodes)}
network = networkx.random_geometric_graph(numnodes, 0.3, pos=positions)
matrix = networkx.to_scipy_sparse_matrix(network)

result = markov_clustering.run_mcl(matrix, inflation=2)
clusters = markov_clustering.get_clusters(result)    # get clusters
markov_clustering.draw_graph(matrix, clusters, pos=positions, node_size=50, with_labels=False, edge_color="k", cmap="magma")


for inflation in [i / 10 for i in range(15, 26)]:
    result = markov_clustering.run_mcl(matrix, inflation=inflation)
    clusters = markov_clustering.get_clusters(result)
    Q = markov_clustering.modularity(matrix=result, clusters=clusters)
    print("inflation:", inflation, "modularity:", Q)


from sklearn.cluster import AgglomerativeClustering
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
              [4, 2], [4, 4], [4, 0]])
clustering = AgglomerativeClustering().fit(X)
clustering 


Ejemplo n.º 14
0
print("Loaded labels (" + str(len(Config.labels)) + " classes): ", end='')
print(Config.labels)

# In[93]:

threshold = 0.75
adjmat = sim.reshape((-1, )).copy()
adjmat[adjmat > threshold] = 0
#adjmat[adjmat > 0] = 1
print("{} out of {} values set to zero".format(len(adjmat[adjmat == 0]),
                                               len(adjmat)))
adjmat = adjmat.reshape(sim.shape)

# In[94]:

G = make_graph(adjmat, labels=Config.labels)
nx.draw_spring(G, with_labels=True)

# In[95]:

matrix = nx.to_scipy_sparse_matrix(G)
result = mc.run_mcl(matrix, inflation=2)  # run MCL with default parameters
clusters = mc.get_clusters(result)  # get clusters
print("There are {} clusters.".format(len(clusters)))
mc.draw_graph(matrix, clusters, with_labels=True, edge_color="silver")

# In[77]:

ref = np.genfromtxt(labelfilename, delimiter=',', dtype=None)
print(ref[19])
Ejemplo n.º 15
0
adjmat = np.zeros((17, 17))
for i in range(17):
    adjmat[i][i] = 1  #diagonal

for edge in data:
    adjmat[edge[0]][edge[1]] = edge[2]
    adjmat[edge[1]][edge[0]] = edge[2]  #symmetric matrix

#normalize adjacency matrix
adj_sum = np.sum(adjmat, axis=0)

for i in range(17):
    adjmat[:, i] = np.divide(adjmat[:, i], adj_sum[i])

#mcl
infla = [1.1, 1.3, 1.5, 1.7, 2.1]

import markov_clustering as mcl

final_mat = []
final_cl = []
for i in infla:
    result = mcl.run_mcl(adjmat, inflation=i, iterations=10)
    final_mat.append(result)
    cluster = mcl.get_clusters(result)
    final_cl.append(cluster)

print(result)
k = 4
mcl.draw_graph(final_mat[k], final_cl[k], with_labels=True, edge_color="black")