def markov_clustering(distance_mat, inflation): """ Runs the Markov Clustering algorithm on the input distance matrix. Inputs: DISTANCE_MAT: A (neurons x neurons) numpy matrix calculated by some distance metric. INFLATION: An int; the Hadamarde power to take during the inflation step. In general, values from 1.1 to 10.0 can be tried, with higher values generally resulting in more clusters. Inflation boosts the probabilities of intra-cluster walks and demotes inter-cluster walks. Outputs: CLUSTERS: A (neurons x neurons) numpy matrix of the final remaining clusters. Q: A float between [-1,1]; the modularity score associated with this clustering. Modularity measures the density of in-cluster edges to out-of-cluster edges. Specifically, it is the fraction of edges that fall within the clusters minus the expected fraction if edges were randomly distributed. """ G = nx.from_numpy_matrix(distance_mat) sparse_G = nx.to_scipy_sparse_matrix(G) result = mc.run_mcl(sparse_G, inflation=inflation) clusters = mc.get_clusters(result) Q = mc.modularity(matrix=result, clusters=clusters) return clusters, Q
def mcl(graph, viz=False): mat = nx.to_numpy_matrix(graph) mod = -1 for val in np.arange(1.2,3,0.1): res = mc.run_mcl(mat, inflation=val) clust = mc.get_clusters(res) q = mc.modularity(matrix=np.asmatrix(res), clusters=clust) if q > mod: clusters = clust if viz == False: labels = dict(zip(range(len(graph)),graph.nodes())) return[[labels.get(item) for item in clust] for clust in clusters] else: plt.figure(num=None, figsize=(20,20), dpi=50) pos = nx.spring_layout(graph) mc.draw_graph(mat, clusters, node_size=200, with_labels=False, edge_color="silver")
def test_modularity(): source = np.matrix(test_matrices[4][0]) target = test_matrices[4][1] clusters = mc.get_clusters(mc.run_mcl(source)) quality = mc.modularity(source, clusters) assert np.isclose(quality, target)
def mcl_parameter_QC(network, range_from=15, range_to=26): matrix = nx.to_scipy_sparse_matrix(network) # perform clustering using different inflation values from 1.5 and 2.5 # for each clustering run, calculate the modularity for inflation in [i / 10 for i in range(range_from, range_to)]: result = mc.run_mcl(matrix, inflation=inflation) clusters = mc.get_clusters(result) Q = mc.modularity(matrix=result, clusters=clusters) print("inflation:", inflation, "modularity:", Q)
def modularity(matrix, clusters): if type(clusters) == dict: # we need to convert it to the right format x = [] for c in clusters.values(): items = tuple(i.idx for i in c) x.append(items) clusters = x Q = mc.modularity(matrix=matrix, clusters=clusters) return Q
def optimise_inflation(matrix, start=1.1, end=2.5, step=0.1): I_lis = np.arange(start, end, step).tolist() Q_lis = np.zeros(shape=(len(I_lis),1)) for n,I in enumerate(I_lis): result = markov_clustering.run_mcl(matrix, inflation=I) clusters = markov_clustering.get_clusters(result) Q = markov_clustering.modularity(matrix=result, clusters=clusters) Q_lis[n] = Q max_Q_index = np.argmax(Q_lis) # return inflation with maximum modularity and modularities array return I_lis[max_Q_index], Q_lis[max_Q_index]
def opt_cluster_graph(graph): """ Finds the best possible clustering of <graph> (optimized over modularity) :param graph: (type=NetworkX graph) :return: (type=tuple<NumPy Matrix, tuple<NumpyMatrix> >) the matrix representation of the graph followed the tuple of cluster matrices """ best_quality = -2 best_clusters = None best_infl = 0 matrix = nx.to_scipy_sparse_matrix(graph) #get adjacency matrix in sparse form for infl in INFLATION_VALS: m, clusters, _ = cluster_graph(graph, infl) quality = mc.modularity(matrix, clusters) #measure of quality of clustering if DEBUG: print("inflation: " + str(infl) + ", expansion: " + str(expn) + ", modularity: " + str(quality)) if quality > best_quality: best_clusters = clusters best_quality = quality best_infl = infl return matrix, best_clusters, best_quality, best_infl
edge_labels = {} count = 0 for i, origin_state in enumerate(new_data): for j, destination_state in enumerate(origin_state): rate = new_data[i][j] if rate > 0: count = count + 1 try: G.add_edge(indices[i], indices[j], weight=rate) except: pdb.set_trace() print('ol') matrix = nx.to_scipy_sparse_matrix(G) for inflation in [i / 10 for i in range(15, 26)]: result = mc.run_mcl(matrix, inflation=inflation) clusters = mc.get_clusters(result) Q = mc.modularity(matrix=result, clusters=clusters) print("inflation:", inflation, "modularity:", Q) #communities_generator = community.girvan_newman(G) #top_level_communities = next(communities_generator) result = mc.run_mcl(matrix) # run MCL with default parameters clusters = mc.get_clusters(result) pdb.set_trace() GG = pyintergraph.nx2igraph(G, labelname="node_label") clusters = nx.clustering(G, weight='weight')
numnodes = 100 positions = {i:(random.random() * 2 - 1, random.random() * 2 - 1) for i in range(numnodes)} network = networkx.random_geometric_graph(numnodes, 0.3, pos=positions) matrix = networkx.to_scipy_sparse_matrix(network) result = markov_clustering.run_mcl(matrix, inflation=2) clusters = markov_clustering.get_clusters(result) # get clusters markov_clustering.draw_graph(matrix, clusters, pos=positions, node_size=50, with_labels=False, edge_color="k", cmap="magma") for inflation in [i / 10 for i in range(15, 26)]: result = markov_clustering.run_mcl(matrix, inflation=inflation) clusters = markov_clustering.get_clusters(result) Q = markov_clustering.modularity(matrix=result, clusters=clusters) print("inflation:", inflation, "modularity:", Q) from sklearn.cluster import AgglomerativeClustering import numpy as np X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]]) clustering = AgglomerativeClustering().fit(X) clustering clustering.labels_