def mcl(graph, viz=False): mat = nx.to_numpy_matrix(graph) mod = -1 for val in np.arange(1.2,3,0.1): res = mc.run_mcl(mat, inflation=val) clust = mc.get_clusters(res) q = mc.modularity(matrix=np.asmatrix(res), clusters=clust) if q > mod: clusters = clust if viz == False: labels = dict(zip(range(len(graph)),graph.nodes())) return[[labels.get(item) for item in clust] for clust in clusters] else: plt.figure(num=None, figsize=(20,20), dpi=50) pos = nx.spring_layout(graph) mc.draw_graph(mat, clusters, node_size=200, with_labels=False, edge_color="silver")
def plot_clusters(filename, matrix, clusters, height=5, width=7, dpi=300): from matplotlib import pyplot as plt fig, ax = plt.subplots(figsize=(width, height)) mc.draw_graph(matrix, clusters, node_size=50, with_labels=False, edge_color="silver", ax=ax) fig.savefig(filename, dpi=dpi) return
def export_node_list_withimage(encode_data, name_data, matrix_form=False, thresh=5, inflation=1.5, header=True, delim=',', label=None, filename='node_list.csv'): print('building graph') if matrix_form: numnodes = encode_data.shape[0] matrix = encode_data else: numnodes = len(encode_data) positions = {i: encode_data[i] for i in range(numnodes)} # use networkx to generate the graph network = nx.random_geometric_graph(numnodes, thresh, pos=positions) # then get the adjacency matrix (in sparse form) matrix = nx.to_scipy_sparse_matrix(network) print('runing mcl') # result = mc.run_mcl(matrix, inflation=inflation) clusters = mc.get_clusters(result) mc.draw_graph(matrix, clusters, node_size=50, with_labels=False, edge_color="silver") if label == None: label = [str(i) for i in range(numnodes)] f = open("node_list.csv", 'w') if header: f.write("Id,Label,Cluster-ID,image\n") i = 1 for j in clusters: for node in j: pos = label[node].find('!') pos2 = name_data[node].find('RTW') sent = "\"" + label[node] + "\"" + delim + "\"" + label[ node][:pos] + "\"" + delim + str(i) + delim + "\"" + name_data[ node][pos2 + 4:-4] + ".png" + "\"" + "\n" f.write(sent) i = i + 1 f.close()
def graph_clustering(graph,cluster_rate = 1.5, draw = False): AS_Num = 0 n_Matrix = nx.to_scipy_sparse_matrix(graph) result = mc.run_mcl(n_Matrix,inflation = cluster_rate) clusters = mc.get_clusters(result) print("Number of AS: " + str(len(clusters))) graph.graph['Total_AS'] = len(clusters) for c in clusters: for n_id in c: graph.add_node(n_id,AS_N = AS_Num) AS_Num += 1 if(draw): mc.draw_graph(n_Matrix, clusters, node_size=10, with_labels=False, edge_color="black",width=0.2) plt.show()
def cluster_plot(adjacent, title, pos, inflation=1.3, filename=None, labels=None, label_flag=True, node_size=150, figsize=(6, 6), use_nodeaslabel=False, width=1): fig, ax = plt.subplots(1, 1, sharey=False, sharex=False, figsize=figsize) result = mc.run_mcl(adjacent, inflation=inflation) clusters = mc.get_clusters(result) # get clusters plt.title(title) graph = nx.Graph(adjacent) clusters = complete_cluster(clusters, graph.nodes()) if pos is None: pos = nx.spring_layout(graph, iterations=100) mc.draw_graph(adjacent, clusters, pos=pos, with_labels=False, edge_color="silver", node_size=node_size, width=width) if labels is None: if use_nodeaslabel: labels = {n: n for ni, n in enumerate(graph.nodes())} else: labels = {n: ci for ci, c in enumerate(clusters) for n in c} if pos is None: pos = nx.spring_layout(graph, iterations=100) if label_flag: nx.draw_networkx_labels(graph, pos, labels=labels) if not filename is None: plt.savefig(filename, bbox_inches='tight') else: plt.show(block=False) return clusters, pos, labels
def draw_mcl_clustering(clusters, matrix, network): ''' draw mcl clustering with node index need to convert node name to node index to find position :param clusters: :param matrix: :param network: :return: ''' pos = nx.nx_pydot.graphviz_layout(network) pos_index = dict() gene_list = list(network.nodes()) for key, value in pos.items(): key_id = gene_list.index(key) pos_index[key_id] = value mc.draw_graph(matrix, clusters, pos=pos_index, node_size=50, with_labels=False, edge_color="silver")
import numpy as np import markov_clustering as mc import networkx as nx import pandas as pd file = pd.read_excel(open('D:/LEARN/GRADUATE2020/IDA/assignment3/mcl.xlsx', 'rb'), sheet_name='Sheet3') matrix = np.array(file) g = nx.Graph() innum = 2.1 #change inflation values for i in matrix: g.add_edge(i[0], i[1], weight=i[2]) a = nx.to_numpy_array(g) result = mc.run_mcl(a, inflation=innum) for j in range(9): result = mc.run_mcl(result, inflation=innum) clusters = mc.get_clusters(result) #title=[('A',float),('B',float),('C',float),('D',float),('E',float),('F',float),('G',float),('H',float),('J',float),('K',float),('L',float),('M',float),('N',float),('P',float),('Q',float),('R',float),('S',float)] mc.draw_graph(result, clusters, with_labels=True)
import markov_clustering as mc import pandas as pd import networkx as nx import random import matplotlib.pyplot as plt # number of nodes to use numnodes = 45 with open('data-weight.csv', 'rb') as f: G = nx.read_weighted_edgelist(f, delimiter=',') # then get the adjacency matrix (in sparse form) matrix = nx.to_scipy_sparse_matrix(G, weight='weight') result = mc.run_mcl(matrix, inflation=2.5) # result = mc.run_mcl(matrix) clusters = mc.get_clusters(result) mc.draw_graph(matrix, clusters, node_size=50, with_labels=False, edge_color="silver") plt.title("Weighted Markov Clustering Algorithm") plt.show()
def plot_EIF4F_coexp_network (x): EIF4F_InterPro_Net, melted_df_r, melted_df_b = EIF4F_inter_coexp (x) ## plot nodes interacting eIF4G1 by experiments and database ## plot edges for experimental interactions G = nx.from_pandas_edgelist(EIF4F_InterPro_Net, 'protein1', 'protein2', edge_attr = ['experimental','database']) G.add_nodes_from(nodes_for_adding = EIF4F_InterPro_Net.protein1.tolist()) protein = list(EIF4F_InterPro_Net.protein1.unique()) labels = [i for i in dict(G.nodes).keys()] labels = {i:i for i in dict(G.nodes).keys()} fig, ax = plt.subplots(figsize = (20,20)) pos = nx.kamada_kawai_layout(G) #pos = nx.spring_layout(G, seed = 50) # Draw every protein nx.draw_networkx_nodes(G, pos, ax = ax, label =True, node_color='#cccccc', node_size=100) #nx.draw_networkx_nodes(G, pos, # nodelist = ["EIF4A1","EIF4G1"], # node_color='orange', # node_size=100) # Draw POPULAR protein popular_protein = [item for item in protein if G.degree(item) > 20] nx.draw_networkx_nodes(G, pos, nodelist = popular_protein, node_color = 'orange', node_size = 100) nx.draw_networkx_edges(G, pos, ax=ax, width=1, edge_color="#cccccc") _ = nx.draw_networkx_labels(G, pos, labels, ax=ax) #Compute largest connected component of the network (LC) #lsit the components in network (g) components = nx.connected_components(G) #compare among components and find the one having maximun length(LC) largest_component = max(components, key=len) #largest_component # Q1.draw LC subgraph = G.subgraph(largest_component) #pos = nx.spring_layout(subgraph) # force nodes to separte #pos= graphviz_layout(G, prog = "neato") pos = nx.kamada_kawai_layout(G) betCent = nx.betweenness_centrality(subgraph, normalized=True, endpoints=True) node_color = [20000.0 * G.degree(v) for v in subgraph] node_size = [v * 10000 for v in betCent.values()] plt.figure(figsize=(20,15)) nx.draw_networkx(subgraph, pos = pos, with_labels = False, node_color = node_color, node_size = node_size) plt.axis('off') ## plot nodes interacting eIF4G1 by experiment and database ## plot edges of experimental interaction. G = nx.from_pandas_edgelist(melted_df_r, 'protein1', 'protein2', edge_attr = ['sources','color'], create_using = nx.MultiGraph()) G.add_nodes_from(nodes_for_adding = melted_df_r.protein1.tolist()) #weights = nx.get_edge_attributes(G,'weight').values() labels = [i for i in dict(G.nodes).keys()] labels = {i:i for i in dict(G.nodes).keys()} pos = nx.kamada_kawai_layout(G) #pos = nx.spring_layout(G, seed = 50) #pos= graphviz_layout(G, prog = "neato") fig, ax = plt.subplots(figsize = (20,20)) nx.draw_networkx_nodes(G, pos, ax = ax, label =True, node_size = 100, cmap=plt.cm.Blues) nx.draw_networkx_edges(G, pos, edge_color = "r", ax=ax) _ = nx.draw_networkx_labels(G, pos, labels, ax=ax) fig, ax = plt.subplots(figsize = (20,20)) # number of nodes to use numnodes = 200 # generate random positions as a dictionary where the key is the node id and the value # is a tuple containing 2D coordinates positions = {i:(random.random() * 2 - 1, random.random() * 2 - 1) for i in range(numnodes)} # use networkx to generate the graph network = nx.random_geometric_graph(numnodes, 0.3, pos=positions) # then get the adjacency matrix (in sparse form) matrix = nx.to_scipy_sparse_matrix(network) # run the MCL algorithm on the adjacency matrix and retrieve the clusters result = mc.run_mcl(matrix) # run MCL with default parameters clusters = mc.get_clusters(result) # get clusters mc.draw_graph(matrix, clusters, pos=positions, node_size=50, with_labels=False, edge_color="silver")
def markov_clustering(G): matrix = nx.to_scipy_sparse_matrix(G) result = mc.run_mcl(matrix) clusters = mc.get_clusters(result) mc.draw_graph(matrix, clusters, node_size=50, with_labels=False, edge_color="silver") return clusters
import cooler import numpy as np import pandas as pd import matplotlib.pyplot as plt import markov_clustering as mc import networkx as nx clr = cooler.Cooler(sys.argv[1]) chroms = clr.chroms()[:] inter = np.zeros((chroms.shape[0], chroms.shape[0])) for i1, c1 in enumerate(chroms['name']): for i2, c2 in enumerate(chroms['name']): if c1 != c2: inter[i1, i2] = clr.matrix(balance=False, sparse=True).fetch(c1, c2).mean() plt.imshow(inter) plt.show() markov_inter = mc.run_mcl(inter) clusters = mc.get_clusters(markov_inter) mc.draw_graph(markov_inter, clusters, labels={num: name for num, name in enumerate(chroms['name'])}) plt.show()
def draw_clustering(matrix, clusters): mc.draw_graph(matrix, clusters, node_size=4, with_labels=False, edge_color="white") #display clusters
import markov_clustering import networkx import random numnodes = 100 positions = {i:(random.random() * 2 - 1, random.random() * 2 - 1) for i in range(numnodes)} network = networkx.random_geometric_graph(numnodes, 0.3, pos=positions) matrix = networkx.to_scipy_sparse_matrix(network) result = markov_clustering.run_mcl(matrix, inflation=2) clusters = markov_clustering.get_clusters(result) # get clusters markov_clustering.draw_graph(matrix, clusters, pos=positions, node_size=50, with_labels=False, edge_color="k", cmap="magma") for inflation in [i / 10 for i in range(15, 26)]: result = markov_clustering.run_mcl(matrix, inflation=inflation) clusters = markov_clustering.get_clusters(result) Q = markov_clustering.modularity(matrix=result, clusters=clusters) print("inflation:", inflation, "modularity:", Q) from sklearn.cluster import AgglomerativeClustering import numpy as np X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]]) clustering = AgglomerativeClustering().fit(X) clustering
print("Loaded labels (" + str(len(Config.labels)) + " classes): ", end='') print(Config.labels) # In[93]: threshold = 0.75 adjmat = sim.reshape((-1, )).copy() adjmat[adjmat > threshold] = 0 #adjmat[adjmat > 0] = 1 print("{} out of {} values set to zero".format(len(adjmat[adjmat == 0]), len(adjmat))) adjmat = adjmat.reshape(sim.shape) # In[94]: G = make_graph(adjmat, labels=Config.labels) nx.draw_spring(G, with_labels=True) # In[95]: matrix = nx.to_scipy_sparse_matrix(G) result = mc.run_mcl(matrix, inflation=2) # run MCL with default parameters clusters = mc.get_clusters(result) # get clusters print("There are {} clusters.".format(len(clusters))) mc.draw_graph(matrix, clusters, with_labels=True, edge_color="silver") # In[77]: ref = np.genfromtxt(labelfilename, delimiter=',', dtype=None) print(ref[19])
adjmat = np.zeros((17, 17)) for i in range(17): adjmat[i][i] = 1 #diagonal for edge in data: adjmat[edge[0]][edge[1]] = edge[2] adjmat[edge[1]][edge[0]] = edge[2] #symmetric matrix #normalize adjacency matrix adj_sum = np.sum(adjmat, axis=0) for i in range(17): adjmat[:, i] = np.divide(adjmat[:, i], adj_sum[i]) #mcl infla = [1.1, 1.3, 1.5, 1.7, 2.1] import markov_clustering as mcl final_mat = [] final_cl = [] for i in infla: result = mcl.run_mcl(adjmat, inflation=i, iterations=10) final_mat.append(result) cluster = mcl.get_clusters(result) final_cl.append(cluster) print(result) k = 4 mcl.draw_graph(final_mat[k], final_cl[k], with_labels=True, edge_color="black")