def python_leiden(df, partition_type, initial_membership=None, weights=None, n_iterations=2, seed=None, resolution_parameter=1): # create networkx object Gx = nx.from_pandas_edgelist(df=df, source='from', target='to', edge_attr='weight') # get weight attribute myweights = nx.get_edge_attributes(Gx, 'weight') # convert to igraph G = ig.Graph.TupleList(Gx.edges(), directed=False) G.es['weight'] = list(myweights.values()) if partition_type == 'RBConfigurationVertexPartition': partition = la.find_partition( G, partition_type=la.RBConfigurationVertexPartition, initial_membership=initial_membership, weights=weights, n_iterations=n_iterations, seed=seed, resolution_parameter=resolution_parameter) elif partition_type == 'ModularityVertexPartition': partition = la.find_partition( G, partition_type=la.ModularityVertexPartition, initial_membership=initial_membership, weights=weights, n_iterations=n_iterations, seed=seed) else: print('no other configurations have been tested') # create dataframe with results vname = partition.graph.vs['name'] membership = partition.membership membership_plus1 = [x + 1 for x in membership] datadict = {'V': vname, 'mem': membership_plus1} leiden_dfr = pd.DataFrame(datadict) leiden_dfr = leiden_dfr.set_index('V') leiden_dfr = pd.DataFrame(datadict) return (leiden_dfr)
def leiden(self): #########Use Leiden Algorithm to do clustering######## map_del = self.seq_map.tocoo() vcount = map_del.shape[0] sources = map_del.row targets = map_del.col wei = map_del.data index = sources>targets sources = sources[index] targets = targets[index] wei = wei[index] edgelist = list(zip(sources, targets)) g = ig.Graph(vcount, edgelist) #############determine the best resolution parameter########### st = [] res_option = np.arange(0,300,10) res_option[0] = 1 for res in res_option: part = leidenalg.find_partition(g , leidenalg.RBConfigurationVertexPartition , weights=wei , resolution_parameter = res , n_iterations = -1) part = list(part) label_true = [] label_pred = [] for i in range(len(part)): for j in self.tax[part[i]]: if j != 'Unassign': label_true.append(j) label_pred.append(i) ARI_score = metrics.adjusted_rand_score(label_true, label_pred) NMI_score = metrics.normalized_mutual_info_score(label_true, label_pred) st.append((ARI_score+NMI_score)/2) ind = st.index(max(st)) res_optimal = res_option[ind] part = leidenalg.find_partition(g , leidenalg.RBConfigurationVertexPartition , weights=wei , resolution_parameter = res_optimal, n_iterations = -1) part = list(part) # dict of communities numnode = 0 rang = [] for ci in range(len(part)): if np.sum(self.len[part[ci]]) >= self.binsize: rang.append(ci) numnode = numnode+len(part[ci]) for id in part[ci]: self.dist_cluster[self.name[id]] = 'group'+str(ci) logger.debug('The optimal resolution is {}'.format(res_optimal)) logger.debug('There are {} contigs in {} bins'.format(numnode , len(rang))) del map_del, sources, targets, wei, index, edgelist, g, part, label_true, label_pred
def leiden_cluster(wadm, resolution=0.1): # 将邻接矩阵转变为igraph g = get_igraph_from_adjacency(wadm, directed=True) # 设置相关参数 partition_type = leidenalg.RBConfigurationVertexPartition partition_kwargs={} partition_kwargs['weights'] = np.array(g.es['weight']).astype(np.float64) partition_kwargs['n_iterations'] = -1 partition_kwargs['seed'] = 0 partition_kwargs['resolution_parameter'] = resolution par = leidenalg.find_partition(g, partition_type, **partition_kwargs) # 划分转化为标签 # 计算节点数 n = 0 for i in range(len(par)): n += len(par[i]) # 初始化标签,z labels = [-1 for i in range(n)] for i in range(len(par)): for j in range(len(par[i])): labels[par[i][j]] = i return labels
def leiden_partition( vertices: List[int], edges: List[Tuple[int, int]]) -> Tuple[Dict[int, int], List[int]]: """ :param vertices: The vertices to be divided into communities by the Leiden algorithm :type vertices: List[int] :param edges: The links according to which the vertices are divided into communities :type edges: List[Tuple[int, int]] :return: the partition of the vertices, isolated nodes :rtype: Tuple[Dict[int, int], List[int]] """ graph = ig.Graph() graph.add_vertices(len(vertices)) graph.vs['id'] = vertices graph.add_edges(edges) isolates = [(v.index, v['id']) for v in graph.vs.select(_degree=0)] graph.delete_vertices([v[0] for v in isolates]) isolates = [v[1] for v in isolates] if len(graph.vs) == 0: return {}, isolates partition = leidenalg.find_partition(graph, leidenalg.ModularityVertexPartition) partition_map = {} for index, p in enumerate(partition.membership): partition_map[graph.vs[index]['id']] = p return partition_map, isolates
def leiden(self): """ Cluster the SNN graph using the Leiden algorithm. https://github.com/vtraag/leidenalg From Louvain to Leiden: guaranteeing well-connected communities Traag V, Waltman L, van Eck NJ https://arxiv.org/abs/1810.08473 """ log_debug('Running leiden clustering...') res = self.params['leiden_res'] seed = self.params['seed'] # construct the graph object nn = set(self.snn_graph[self.snn_graph.columns[0]]) g = ig.Graph() g.add_vertices(len(nn)) g.vs['name'] = list(range(1, len(nn) + 1)) ll = [] for i in self.snn_graph.itertuples(index=False): ll.append(tuple(i)) g.add_edges(ll) if self.params == 'ModularityVertexPartition': part = leidenalg.ModularityVertexPartition else: part = leidenalg.RBERVertexPartition cl = leidenalg.find_partition(g, part, n_iterations=10, resolution_parameter=res, seed=seed) self.leiden_cl = cl.membership self.leiden_prep() log_debug('Leiden has finished.')
def detect_community(matrix, weight="weight",method="leidenalg"): #Find k-clique communities in graph using the percolation method. g = nx.from_numpy_matrix(matrix) dict_communities = {} #if method=="percolation": #communities = list(k_clique_communities(g, 3)) #elif method=="Clauset-Newman-Moore": #communities = list(greedy_modularity_communities(g)) #elif method == "infomap": #communities = findCommunities(g) #elif method == "label_propagation": #communities = label_propagation_communities(g) if method == "leidenalg": # run use python2 import igraph as ig # run use python2 import leidenalg conn_indices = np.where(matrix) # get the weights corresponding to these indices weights = matrix[conn_indices] # a sequence of (i, j) tuples, each corresponding to an edge from i -> j edges = zip(*conn_indices) g = ig.Graph(edges) communities = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition) #print(communities.membership) #print(len(communities.membership)) #print("network adj shape",matrix.shape) for i in range(0,len(communities.membership)): dict_communities[i] = communities.membership[i] return dict_communities
def make_leiden_clusters(self, resolution: float = 1.0, random_seed=4466) -> None: """ Leiden clustering :param n_clusters: Number of clusters :return: None """ try: import leidenalg except ImportError: raise ImportError( "ERROR: 'leidenalg' package is not installed. Please find the installation instructions " "here: https://github.com/vtraag/leidenalg#installation.") import igraph # python-igraph adj = nx.to_scipy_sparse_matrix(self.refG) sources, targets = adj.nonzero() g = igraph.Graph() g.add_vertices(adj.shape[0]) g.add_edges(list(zip(sources, targets))) g.es['weight'] = adj[sources, targets].A1 part = leidenalg.find_partition( g, leidenalg.RBConfigurationVertexPartition, resolution_parameter=resolution, seed=random_seed) clusts = np.array(part.membership) + 1 for n, c in zip(self.refG.nodes, clusts): self.nodes[n]['cluster'] = str(c) return None
def leidenalg_igraph(g, res, random_state=0): '''Leidenalg clustering on an igraph object. Parameters ---------- g : igraph The igraph object of connectivities. res : float The resolution parameter for Leidenalg clustering. random_state : int, optional The random state. Returns ---------- labels : np.array \([N, ]\) The clustered labels. ''' partition_kwargs = {} partition_type = leidenalg.RBConfigurationVertexPartition partition_kwargs["resolution_parameter"] = res partition_kwargs["seed"] = random_state part = leidenalg.find_partition( g, partition_type, **partition_kwargs, ) labels = np.array(part.membership) return labels
def ld_time(G): # initiate an igraph object g = ig.Graph() # add vertices g.add_vertices(G.nodes) # add edges g.add_edges(G.edges) # add weights if nx.is_weighted(G): g.es['weight'] = list(nx.get_edge_attributes(G, 'weight').values()) # initiate a list to store execution time for each algo algo_time = [] for i in tqdm(range(10)): # start start_time = time.time() # fit the model partition = leidenalg.find_partition( g, leidenalg.ModularityVertexPartition) algo_time.append(time.time() - start_time) return np.mean(algo_time)
def run_leiden(g, **params): """ runs the leiden partitioning algorithm on a given graph. """ import leidenalg part = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition) return part.membership
def leiden(conn, resolution=0.05, random_state=0, n_iterations=-1): try: import leidenalg as la except ImportError: raise ImportError( 'Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`.' ) start = print('running Leiden clustering') partition_kwargs = {} # # convert adjacency matrix into igraph g = get_igraph_from_adjacency(conn) # Parameter setting partition_type = la.RBConfigurationVertexPartition partition_kwargs['weights'] = np.array(g.es['weight']).astype(np.float64) partition_kwargs['n_iterations'] = n_iterations partition_kwargs['seed'] = random_state partition_kwargs['resolution_parameter'] = resolution # Leiden algorithm # part = la.find_partition(g, la.CPMVertexPartition, **partition_kwargs) part = la.find_partition(g, partition_type, **partition_kwargs) # groups store the length |V| array, the integer in each element(node) denote the cluster it belong groups = np.array(part.membership) n_clusters = int(np.max(groups) + 1) print('finished') return groups, n_clusters
def run_alg(G, alg, gamma=1.0, sample=1.0): ''' Run community detection algorithm with a resolution parameter. Right now only use RB in Louvain/Leiden Parameters ---------- G : igraph.Graph alg : str choose between 'louvain' and 'leiden' gamma : float resolution parameter sample : if smaller than 1, randomly delete a fraction of edges each time Returns ------ C: scipy.sparse.csr_matrix a matrix recording the membership of each cluster ''' G1 = G.copy() if sample < 1: G1 = network_perturb(G, sample) if alg =='louvain': partition_type = louvain.RBConfigurationVertexPartition partition = louvain.find_partition(G1, partition_type, resolution_parameter=gamma) elif alg == 'leiden': partition_type = leidenalg.RBConfigurationVertexPartition partition = leidenalg.find_partition(G1, partition_type, resolution_parameter=gamma) # partition = sorted(partition, key=len, reverse=True) LOGGER.info('Resolution: {:.4f}; find {} clusters'.format(gamma, len(partition))) return partition_to_membership_matrix(partition)
def surprise_communities(g, initial_membership=None, weights=None, node_sizes=None): """ Surprise_communities is a model where the quality function to optimize is: .. math:: Q = m D(q \\parallel \\langle q \\rangle) where :math:`m` is the number of edges, :math:`q = \\frac{\\sum_c m_c}{m}`, is the fraction of internal edges, :math:`\\langle q \\rangle = \\frac{\\sum_c \\binom{n_c}{2}}{\\binom{n}{2}}` is the expected fraction of internal edges, and finally :math:`D(x \\parallel y) = x \\ln \\frac{x}{y} + (1 - x) \\ln \\frac{1 - x}{1 - y}` is the binary Kullback-Leibler divergence. For directed graphs we can multiplying the binomials by 2, and this leaves :math:`\\langle q \\rangle` unchanged, so that we can simply use the same formulation. For weighted graphs we can simply count the total internal weight instead of the total number of edges for :math:`q` , while :math:`\\langle q \\rangle` remains unchanged. :param g: a networkx/igraph object :param initial_membership: list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases this could be changed. Deafault None :return: NodeClustering object :Example: >>> from cdlib import algorithms >>> import networkx as nx >>> G = nx.karate_club_graph() >>> coms = algorithms.surprise_communities(G) :References: Traag, V. A., Aldecoa, R., & Delvenne, J.-C. (2015). `Detecting communities using asymptotical surprise. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.92.022816/>`_ Physical Review E, 92(2), 022816. 10.1103/PhysRevE.92.022816 .. note:: Reference implementation: https://github.com/vtraag/leidenalg """ if ig is None: raise ModuleNotFoundError( "Optional dependency not satisfied: install igraph to use the selected feature." ) g = convert_graph_formats(g, ig.Graph) part = leidenalg.find_partition(g, leidenalg.SurpriseVertexPartition, initial_membership=initial_membership, weights=weights, node_sizes=node_sizes) coms = [g.vs[x]['name'] for x in part] return NodeClustering(coms, g, "Surprise", method_parameters={ "initial_membership": initial_membership, "weights": weights, "node_sizes": node_sizes })
def main(timestamp): nng = ig.read(f'/tmp/{timestamp}_leiden_graph.gml', format='graphml') partition = la.find_partition(nng, la.ModularityVertexPartition) clusters = partition.membership clusters = np.array(clusters).astype(str) np.savetxt(f'/tmp/{timestamp}_leiden_clusters.csv', clusters, delimiter=',', newline='\n', fmt='%s')
def __call__(self, orig_affinity_mat, initclusters): #assert there are no nan values in data #assert that the min affinity is >= 0 if scipy.sparse.issparse(orig_affinity_mat): assert np.sum(np.isnan(orig_affinity_mat.data))==0 #assert that the min affinity is >= 0 assert np.min(orig_affinity_mat.data) >= 0,\ np.min(orig_affinity_mat.data) else: assert np.sum(np.isnan(orig_affinity_mat))==0 assert np.min(orig_affinity_mat) >= 0,\ np.min(orig_affinity_mat) if (self.verbose): print("Beginning preprocessing + Leiden") print_memory_use() sys.stdout.flush() all_start = time.time() if (self.affmat_transformer is not None): affinity_mat = self.affmat_transformer(orig_affinity_mat) else: affinity_mat = orig_affinity_mat the_graph = get_igraph_from_adjacency(adjacency=affinity_mat) best_clustering = None best_quality = None if (self.verbose): toiterover = tqdm(range(self.numseedstotry)) else: toiterover = range(self.numseedstotry) #if an initclustering is specified, we would want to try the Leiden # both with and without that initialization and take the one that # gets the best modularity initclusters_to_try_list = [None] if (initclusters is not None): initclusters_to_try_list.append(initclusters) for seed in toiterover: for initclusters_to_try in initclusters_to_try_list: partition = leidenalg.find_partition( the_graph, self.partitiontype, weights=(np.array(the_graph.es['weight']) .astype(np.float64)), n_iterations=self.n_leiden_iterations, initial_membership=initclusters_to_try, seed=seed*100) quality = partition.quality() if ((best_quality is None) or (quality > best_quality)): best_quality = quality best_clustering = np.array(partition.membership) if (self.verbose): print("Quality:",best_quality) sys.stdout.flush() return ClusterResults(cluster_indices=best_clustering, quality=best_quality)
def run_leiden( graph: sp.coo_matrix, directed: bool, partition_type: Optional[Type[MutableVertexPartition]], resolution_parameter: float, n_iterations: int, seed: Optional[int], use_weights: bool, kargs, ) -> Tuple[np.ndarray, float]: """ Wrapper for leiden community detection Args: graph (sp.coo_matrix): Affinity matrix directed (bool): See below in 'cluster()' partition_type (Optional[Type[MutableVertexPartition]]): See below in 'cluster()' resolution_parameter (float): See below in 'cluster()' n_iterations (int): See below in 'cluster()' seed (Optional[int]): See below in 'cluster()' use_weights (bool): See below in 'cluster()' kargs: See below in 'cluster()' Returns: communities, Q (Tuple[np.ndarray, float]): See below in 'cluster()' """ # convert resulting graph from scipy.sparse.coo.coo_matrix to Graph object # get indices of vertices edgelist = np.vstack(graph.nonzero()).T.tolist() g = ig.Graph(max(graph.shape), edgelist, directed=directed) # set vertices as weights g.es["weights"] = graph.data if not partition_type: partition_type = leidenalg.RBConfigurationVertexPartition if resolution_parameter: kargs["resolution_parameter"] = resolution_parameter if use_weights: kargs["weights"] = np.array(g.es["weights"]).astype("float64") kargs["n_iterations"] = n_iterations kargs["seed"] = seed print("Running Leiden optimization", flush=True) tic_ = time.time() communities = leidenalg.find_partition( g, partition_type=partition_type, **kargs, ) Q = communities.q print( "Leiden completed in {} seconds".format(time.time() - tic_), flush=True, ) communities = np.asarray(communities.membership) return communities, Q
def rb_pots(g, initial_membership=None, weights=None, resolution_parameter=1): """ Rb_pots is a model where the quality function to optimize is: .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma \\frac{k_i k_j}{2m} \\right)\\delta(\\sigma_i, \\sigma_j) where :math:`A` is the adjacency matrix, :math:`k_i` is the (weighted) degree of node :math:`i`, :math:`m` is the total number of edges (or total edge weight), :math:`\\sigma_i` denotes the community of node :math:`i` and :math:`\\delta(\\sigma_i, \\sigma_j) = 1` if :math:`\\sigma_i = \\sigma_j` and `0` otherwise. For directed graphs a slightly different formulation is used, as proposed by Leicht and Newman : .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma \\frac{k_i^\mathrm{out} k_j^\mathrm{in}}{m} \\right)\\delta(\\sigma_i, \\sigma_j), where :math:`k_i^\\mathrm{out}` and :math:`k_i^\\mathrm{in}` refers to respectively the outdegree and indegree of node :math:`i` , and :math:`A_{ij}` refers to an edge from :math:`i` to :math:`j`. Note that this is the same of Leiden algorithm when setting :math:`\\gamma=1` and normalising by :math:`2m`, or :math:`m` for directed graphs. :param g: a networkx/igraph object :param initial_membership: list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None :param resolution_parameter: double >0 A parameter value controlling the coarseness of the clustering. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Default 1 :return: NodeClustering object :Example: >>> from cdlib import algorithms >>> import networkx as nx >>> G = nx.karate_club_graph() >>> coms = algorithms.rb_pots(G) :References: Reichardt, J., & Bornholdt, S. (2006). `Statistical mechanics of community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.74.016110/>`_ Physical Review E, 74(1), 016110. 10.1103/PhysRevE.74.016110 Leicht, E. A., & Newman, M. E. J. (2008). `Community Structure in Directed Networks. <https://www.ncbi.nlm.nih.gov/pubmed/18517839/>`_ Physical Review Letters, 100(11), 118703. 10.1103/PhysRevLett.100.118703 """ if ig is None: raise ModuleNotFoundError( "Optional dependency not satisfied: install igraph to use the selected feature." ) g = convert_graph_formats(g, ig.Graph) part = leidenalg.find_partition(g, leidenalg.RBConfigurationVertexPartition, resolution_parameter=resolution_parameter, initial_membership=initial_membership, weights=weights) coms = [g.vs[x]['name'] for x in part] return NodeClustering(coms, g, "RB Pots", method_parameters={ "initial_membership": initial_membership, "weights": weights, "resolution_parameter": resolution_parameter })
def get_leiden_modularity(G): ''' Accept a networkx graph, return the modularity of the best partition according to the Leiden algorithm. ''' Gi = nx_to_ig(G) leid = leidenalg.find_partition(Gi, leidenalg.ModularityVertexPartition) m1 = leid.modularity return m1
def leiden_with_silhouette_score(X, leiden_nneighbors, skip_silhouette=False, leiden_iterations=10): """ Parameters ---------- X : leiden_nneighbors : skip_silhouette : (Default value = False) leiden_iterations : (Default value = 10) Returns ------- """ from sklearn.neighbors import kneighbors_graph from panopticon.utilities import get_igraph_from_adjacency from panopticon.utilities import import_check from sklearn.metrics import silhouette_score from collections import namedtuple exit_code = import_check("leidenalg", 'conda install -c conda-forge leidenalg') if exit_code != 0: return import leidenalg A = kneighbors_graph(X, leiden_nneighbors, mode='connectivity', include_self=True, metric='cosine') ig = get_igraph_from_adjacency(A) part = leidenalg.find_partition(ig, leidenalg.RBConfigurationVertexPartition, n_iterations=leiden_iterations, seed=17) clustering = part.membership if skip_silhouette: score = None else: score = silhouette_score( X, clustering, metric='cosine', ) leiden_silhouette_output = namedtuple("LeidenSilhouetteOutput", "score nneighbors clustering") return leiden_silhouette_output(score, leiden_nneighbors, clustering)
def rber_pots(g, initial_membership=None, weights=None, node_sizes=None, resolution_parameter=1): """ rber_pots is a Leiden model where the quality function to optimize is: .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma p \\right)\\delta(\\sigma_i, \\sigma_j) where :math:`A` is the adjacency matrix, :math:`p = \\frac{m}{\\binom{n}{2}}` is the overall density of the graph, :math:`\\sigma_i` denotes the community of node :math:`i`, :math:`\\delta(\\sigma_i, \\sigma_j) = 1` if :math:`\\sigma_i = \\sigma_j` and `0` otherwise, and, finally :math:`\\gamma` is a resolution parameter. :param g: a networkx/igraph object :param initial_membership: list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases this could be changed. Deafault None :param resolution_parameter: double >0 A parameter value controlling the coarseness of the clustering. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Deafault 1 :return: NodeClustering object :Example: >>> from cdlib import algorithms >>> import networkx as nx >>> G = nx.karate_club_graph() >>> coms = algorithms.rber_pots(G) :References: Reichardt, J., & Bornholdt, S. (2006). `Statistical mechanics of community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.74.016110/>`_ Physical Review E, 74(1), 016110. 10.1103/PhysRevE.74.016110 .. note:: Reference implementation: https://github.com/vtraag/leidenalg """ g = convert_graph_formats(g, ig.Graph) part = leidenalg.find_partition( g, leidenalg.RBERVertexPartition, resolution_parameter=resolution_parameter, initial_membership=initial_membership, weights=weights, node_sizes=node_sizes, ) coms = [g.vs[x]['name'] for x in part] return NodeClustering(coms, g, "RBER Pots", method_parameters={ "initial_membership": initial_membership, "weights": weights, "node_sizes": node_sizes, "resolution_parameter": resolution_parameter })
def get_leiden(mknn, min_cluster_size=10, resolution_parameter=1.0, seed=0, n_iterations=5): g = ig.Graph(n=mknn.shape[0], edges=list(zip(mknn.row, mknn.col)), directed=False) part = leidenalg.find_partition(g, leidenalg.RBConfigurationVertexPartition, seed = seed, n_iterations = n_iterations, resolution_parameter=resolution_parameter, ) return CellLabels(clean_labels(part.membership, min_cluster_size=min_cluster_size))
def leiden(gr, resolution=1, optimizer="RB", seed=12343): weights = gr.es["weight"] if (optimizer == "RB"): algo = la.RBConfigurationVertexPartition elif (optimizer == "CPM"): algo = la.CPMVertexPartition partition = la.find_partition(gr, algo, n_iterations=10, seed=seed, resolution_parameter=resolution, weights=weights) return partition
def communities_leiden(graph): graphi = nx_to_ig(graph) partition = la.find_partition(graphi, la.ModularityVertexPartition) dictionary = get_id_to_title(graph) res = translate_leiden_to_dict(partition, graphi, dictionary) print('detcted',len(res),'communities') return res
def leiden_clustering(adjacency, res=1.0, directed=False, part=None): import leidenalg g, weights = generate_igraph(adjacency, directed=directed) if part is None: part = leidenalg.find_partition( g, leidenalg.RBConfigurationVertexPartition, resolution_parameter=res) part = part.membership modularity = g.modularity(part, weights=weights) return part, modularity
def cpm(g_original, initial_membership=None, weights=None, node_sizes=None, resolution_parameter=1): """ CPM is a model where the quality function to optimize is: .. math:: Q = \\sum_{ij} \\left(A_{ij} - \\gamma \\right)\\delta(\\sigma_i, \\sigma_j) where :math:`A` is the adjacency matrix, :math:`\\sigma_i` denotes the community of node :math:`i`, :math:`\\delta(\\sigma_i, \\sigma_j) = 1` if :math:`\\sigma_i = \\sigma_j` and `0` otherwise, and, finally :math:`\\gamma` is a resolution parameter. The internal density of communities .. math:: p_c = \\frac{m_c}{\\binom{n_c}{2}} \\geq \\gamma is higher than :math:`\\gamma`, while the external density :math:`p_{cd} = \\frac{m_{cd}}{n_c n_d} \\leq \\gamma` is lower than :math:`\\gamma`. In other words, choosing a particular :math:`\\gamma` corresponds to choosing to find communities of a particular density, and as such defines communities. Finally, the definition of a community is in a sense independent of the actual graph, which is not the case for any of the other methods. :param g_original: a networkx/igraph object :param initial_membership: list of int Initial membership for the partition. If :obj:`None` then defaults to a singleton partition. Deafault None :param weights: list of double, or edge attribute Weights of edges. Can be either an iterable or an edge attribute. Deafault None :param node_sizes: list of int, or vertex attribute Sizes of nodes are necessary to know the size of communities in aggregate graphs. Usually this is set to 1 for all nodes, but in specific cases this could be changed. Deafault None :param resolution_parameter: double >0 A parameter value controlling the coarseness of the clustering. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Deafault 1 :return: NodeClustering object :Example: >>> from cdlib import algorithms >>> import networkx as nx >>> G = nx.karate_club_graph() >>> coms = algorithms.cpm(G) :References: Traag, V. A., Van Dooren, P., & Nesterov, Y. (2011). `Narrow scope for resolution-limit-free community detection. <https://journals.aps.org/pre/abstract/10.1103/PhysRevE.84.016114/>`_ Physical Review E, 84(1), 016114. 10.1103/PhysRevE.84.016114 .. note:: Reference implementation: https://github.com/vtraag/leidenalg """ if ig is None: raise ModuleNotFoundError("Optional dependency not satisfied: install igraph to use the selected feature.") g = convert_graph_formats(g_original, ig.Graph) part = leidenalg.find_partition(g, leidenalg.CPMVertexPartition, resolution_parameter=resolution_parameter, initial_membership=initial_membership, weights=weights, node_sizes=node_sizes, ) coms = [g.vs[x]['name'] for x in part] return NodeClustering(coms, g_original, "CPM", method_parameters={"initial_membership": initial_membership, "weights": weights, "node_sizes": node_sizes, "resolution_parameter": resolution_parameter})
def communityFromGraph(ig): """ Loads graph from edge list pKL file and calculates communities """ #dfile = make_graph_from_dict(gfile) #ig = pickle.load(open(dfile, 'rb'))#getIgraph(dfile) partition = la.find_partition(ig, la.ModularityVertexPartition) comm_dict = {} for p in range(len(partition)): comm_dict[p] = ig.vs.select(partition[p])['name'] print("Found", len(comm_dict), 'communities for the primary interactome') return comm_dict
def louvain_clusters(latent, k=10, rands=0): nn_matrix = kneighbors_graph(latent, k) rows, cols = np.where(nn_matrix.todense() == 1) edges = [(row, col) for row, col in zip(rows, cols)] g = ig.Graph() g.add_vertices(latent.shape[0]) g.add_edges(edges) res = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition, seed=rands) clusters = np.asarray(res.membership) return clusters
def community_detection(input, output): print("Loading graph...") graph = Graph.Read_Pickle(input) print("Partitioning...") part = leidenalg.find_partition(graph, leidenalg.ModularityVertexPartition) print("Saving partition...") with open(output, "w") as file: for partition in part: file.write(repr(partition)) file.write("\n")
def communities_lei(mob): G = od_igraph(m) partition = leidenalg.find_partition(G, leidenalg.ModularityVertexPartition, n_iterations=2, weights='weight') cluster = [] for i, part in enumerate(partition): df = pd.DataFrame({'quadkey': G.vs()[part]['name']}) df['cluster'] = i cluster.append(df) return (pd.concat(cluster))
def identify_communities_leidenalg(net): giant = get_largest_component(net) comms = leidenalg.find_partition(giant, leidenalg.ModularityVertexPartition) comm_list = comms.subgraphs() # communities in current level print('Number of communities identified:',len(comm_list)) net_copy = net.copy() net_copy.vs['community'] = "-1" for idx,comm in enumerate(comm_list): for v1 in comm.vs: v2 = net_copy.vs.find(name=v1['name']) v2['community'] = str(idx+1) return net_copy