def _graph_community(G): # unused function '''The 'graph_community' function is used to analyse a corpus at two levels of the dendrogram of the corpus coupling graph G in a way that the size of all the communities are <= SIZECUT. Author: Sebastian Grauwin (http://sebastian-grauwin.com/bibliomaps/) Args: G (networkx object): corpus coupling graph. Returns: louvain_partition (dict): partition of the corpus coupling graph G. ''' # 3rd party import import community as community_louvain # TO DO: move SIZECUT in COUPL_GLOBAL_VALUES if _graph_community is used SIZECUT = 10 # Upper limit of size communities dendrogram, part, max_mod = _runpythonlouvain(G) part2 = part.copy() to_update = {} communities_id, nodes_id = set(part.values()), list(part.keys()) for community_id in communities_id: list_nodes = [ nodes for nodes in part.keys() if part[nodes] == community_id ] if len(list_nodes) > SIZECUT: # split clusters of size > SIZECUT H = G.subgraph(list_nodes).copy() [dendo2, partfoo, mod] = _runpythonlouvain(H) dendo2 = community_louvain.generate_dendrogram(H, part_init=None) partfoo = community_louvain.partition_at_level( dendo2, len(dendo2) - 1) # add prefix code for aaa in partfoo.keys(): partfoo[aaa] = (community_id + 1) * 1000 + partfoo[aaa] nb_comm = len(set(partfoo.values())) # "community_id" cluster ("len(list_nodes)" records) is split in nb_comm sub-clusters part2.update(partfoo) else: # for communities of less than SIZECUT nodes, shift the com label as well for n in list_nodes: to_update[n] = "" for n in to_update: part2[n] += 1 # ... save partitions louvain_partition = dict() for lev in range(len(dendrogram)): louvain_partition[lev] = community_louvain.partition_at_level( dendrogram, lev) # .. set communtity labels starting from 1 instead of 0 for top level for k in louvain_partition[len(dendrogram) - 1].keys(): louvain_partition[len(dendrogram) - 1][k] += 1 louvain_partition[len(dendrogram)] = part2 return louvain_partition
def communityMining(G, minCommSize=10): """ Find communities in the graph 'G' with more than 'minCommSize' nodes. """ count = 0 dendrogram = community.generate_dendrogram(G) firstPartition = community.partition_at_level(dendrogram,0) sys.stderr.write("Prune sparse clusters. ") #remove early small communities sparseComm = set([k for k,v in Counter(firstPartition.values()).iteritems() if v<minCommSize]) nodes = [node for node in G.nodes() if firstPartition[node] in sparseComm] G.remove_nodes_from(nodes) sys.stderr.write("Find communities. ") # Partition again the graph and report big communities: dendrogram = community.generate_dendrogram(G) partition = community.partition_at_level(dendrogram,len(dendrogram)-2) allfqdns = set(n for n,d in G.nodes(data=True) if d['bipartite']==1) allHosts = set(n for n,d in G.nodes(data=True) if d['bipartite']==0) size = float(len(set(partition.values()))) communities = [] bigComm = [k for k,v in Counter(partition.values()).iteritems() if v>minCommSize] for com in bigComm : comfqdns = [nodes for nodes in allfqdns if partition[nodes] == com] comHosts = [nodes for nodes in allHosts if partition[nodes] == com] comm = G.subgraph(comfqdns+comHosts) if comm.order() < minCommSize : sys.stderr("Remove small community (This shouldn't happen here?)\n") continue communities.append(comm) return communities
def apply_community_louvain(G): start_node_id, end_node_id = get_start_and_end_nodes(G) partition = community_louvain.best_partition(G) dendo = community_louvain.generate_dendrogram(G) highest_partition = community_louvain.partition_at_level( dendo, (len(dendo) - 1)) communities = set(highest_partition.values()) print("Communities;") list_of_communities = [] community_count = 0 for community_number in communities: community_items = [ x for x in highest_partition if highest_partition[x] == community_number ] if start_node_id in community_items or end_node_id in community_items: continue list_of_communities.append(community_items) community_count = community_count + 1 print(f"Community number {community_count}: {community_items}") return list_of_communities
def get_community_assignment(in_df, graph, dendrogram): ''' Utilize dendrogram to find community clusterings at every level available. For each hierarchy level, a new column is added to the returned df with the community clustering. (e.g. cid0 -> 0,0,1,2,3) in_df: Dataframe. Must be indexed by user_id. graph: Networkx Graph. Node IDs should match user_ids in dataframe dendrogram: List of dictionaries, each dictionary mapping user_id to community_id. Each dictionary should represent a level of the clustering hierarchy. return: Tuple of Dataframe with community id assignment columns added and dictionary mapping each level to community modularity (float) ''' df = in_df.copy() community_modularity = {} for i in range(len(dendrogram)): partition = partition_at_level(dendrogram, i) # Infrequently, the community detection algorithm will exclude (?) a # a user ID or two. Still investgating why. For now, these will be # placed into partition 0. df['cid' + str(i)] = [partition[ind] if ind in partition else 0 for ind in df.index] community_modularity[i] = modularity(partition, graph) return df, community_modularity
def external_ec_coarsening(graph, sfdp_path, coarsening_scheme = 2, c_type = 'original'): if c_type == 'louvain': print("Coarsening with Louvain") matrix = magicgraph.to_adjacency_matrix(graph) nx_graph = nx.from_scipy_sparse_matrix(matrix) dendro = community.generate_dendrogram(nx_graph) coarse_graphs = [DoubleWeightedDiGraph(graph)] merges = [] i = 0 for l in range(len(dendro)): level = community.partition_at_level(dendro, l) induced = community.induced_graph(level, nx_graph) filename = 'induced'+str(l)+'.edgelist' #nx.write_edgelist(induced, filename) # write weighted graph to file f = open(filename, 'w') for u, v, a in induced.edges.data('weight', default = 1): line = ' '.join([str(u), str(v), str(a)]) f.write(line + '\n') f.close() m_graph = magicgraph.load_weighted_edgelist(filename, undirected = True) coarse_graphs.append(DoubleWeightedDiGraph(m_graph)) merges.append(level) print('Level: ', i, 'N nodes: ', m_graph.number_of_nodes()) i+= 1 return coarse_graphs, merges elif c_type == 'original': return original_coarsening(graph, sfdp_path, coarsening_scheme)
def louvain_community_detection(networkx_graph): """ Do louvain community detection :param networkx_graph: :return: """ return cm.partition_at_level(cm.generate_dendrogram(networkx_graph, randomize=True, weight='weight'), 0)
def louvianClustering(self, similarity_measure_list): edge_list = [] node_list = [] thresh = 0 #self.getThreshold(similarity_measure_list) for element in similarity_measure_list: f1, f2, val = element if (float(val) > thresh): edge_list.append((f1, f2, float(val))) node_list.append(f1) node_list.append(f2) node_list = list(set(node_list)) G = nx.Graph() G.add_nodes_from(node_list) G.add_weighted_edges_from(edge_list) partition = community.best_partition(G) dendo = community.generate_dendrogram(G, None, 'weight', 1., False) testing = community.partition_at_level(dendo, len(dendo) - 1) res = community.modularity(partition, G, 'weight') list1 = [partition] cluster_set = set(val for dic in list1 for val in dic.values()) cluster_set_elements = [] for cluster_id in cluster_set: temp_elements = [] for node, cluster in partition.iteritems(): if (cluster == cluster_id): temp_elements.append(node) cluster_set_elements.append(temp_elements) self.cluster_set = cluster_set_elements return cluster_set_elements
def calc_louvain(adj_matrix, level=0, return_c_graph=False): nx_G = nx.from_numpy_matrix(adj_matrix) dendro = louvain.generate_dendrogram( nx_G, randomize=False) #Maybe set tandomize True if len(dendro) - level - 1 < 0: raise Exception("The given Level is too deep. The maximum is: " + str(len(dendro) - 1)) communities = louvain.partition_at_level(dendro, len(dendro) - level - 1) number_communities = max(communities, key=lambda x: communities[x]) + 1 # Maybe unnecessary after some code rework and unification community_list = [] for i in range(number_communities): grp_list = [] for grp in communities: if communities[grp] == i: grp_list.append(grp) else: if grp_list: community_list.append(grp_list) community_level_G = louvain.induced_graph(communities, nx_G) if return_c_graph: c_level_graph = nx.adjacency_matrix(community_level_G) else: c_level_graph = None return community_list, c_level_graph
def updateLabels(self, level): # Louvain algorithm labels community at different level (with dendrogram). # Here we want the community labels at a given level. level = int((len(self.dendrogram) - 1) * level) partition = community_louvain.partition_at_level(self.dendrogram, level) # Convert dictionary to numpy array self.labels = np.array(list(partition.values())) return
def test_modularity_increase(self): """ Generate a dendogram and test that modularity is always increasing """ g = nx.erdos_renyi_graph(1000, 0.01) dendo = co.generate_dendogram(g) mod_prec = -1. mods = [co.modularity(co.partition_at_level(dendo, level), g) for level in range(len(dendo)) ] self.assertListEqual(mods, sorted(mods))
def get_covered_entities(self, entity, level): partition = community.partition_at_level(self.dendrogram, level) interesting_partition_code = partition[entity] covered_entities = [] for entity, partition_code in partition.iteritems(): if partition_code != interesting_partition_code: continue covered_entities.append(entity) return covered_entities
def _runpythonlouvain(G): # unused function '''The "_runpythonlouvain" function is used to analyse a corpus at level "len(foo_dendrogram) - 1)" of the corpus coupling graph G dendrogram, (see https://buildmedia.readthedocs.org/media/pdf/python-louvain/latest/python-louvain.pdf). Author: Sebastian Grauwin (http://sebastian-grauwin.com/bibliomaps/) Args: G (networkx object): corpus coupling graph. Returns: results (tuple): [dendrogram, partition, modularity,] where dendrogram [list of dict]: a list of partitions, ie dictionnaries where keys of the i+1 dict are the values of the i dict; partition (dict): Louvain partition of the corpus coupling graph G where dict keys are the pub IDs and the dict values are the community IDs; modularity [float]: modularity. ''' # standard library imports from collections import namedtuple # 3rd party import import community as community_louvain # TO DO: move NRUNS in COUPL_GLOBAL_VALUES if _runpythonlouvain is used. NRUNS = 1 # number of time the louvain algorithm is run for a given network, # the best partition being kept. named_tup_results = namedtuple('results', [ 'dendrogram', 'partition', 'modularity', ]) max_modularity = -1 for run in range(NRUNS): if NRUNS > 1: print(f'......run {run + 1}/{NRUNS}') foo_dendrogram = community_louvain.generate_dendrogram(G, part_init=None) partition_foo = community_louvain.partition_at_level( foo_dendrogram, len(foo_dendrogram) - 1) modularity = community_louvain.modularity(partition_foo, G) if modularity > max_modularity: max_modularity = modularity partition = partition_foo.copy() dendrogram = foo_dendrogram.copy() louvain_part = named_tup_results( dendrogram, partition, modularity, ) return louvain_part
def busmap_by_louvain(network, level=-1): lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=1./network.lines.x).set_index(['bus0','bus1']) G = nx.Graph() G.add_nodes_from(network.buses.index) G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples()) dendrogram = community.generate_dendrogram(G) if level < 0: level += len(dendrogram) return pd.Series(community.partition_at_level(dendrogram, level=level), index=network.buses.index)
def test_modularity_increase(self): """ Generate a dendrogram and test that modularity is always increasing """ graph = nx.erdos_renyi_graph(1000, 0.01) dendo = co.generate_dendrogram(graph) mods = [ co.modularity(co.partition_at_level(dendo, level), graph) for level in range(len(dendo)) ] self.assertListEqual(mods, sorted(mods))
def Mod(G,usebest=True,l=1): D = G.to_undirected() dendo = community.generate_dendogram(D, None) if usebest: level = len(dendo)-1 else: level = l partition = community.partition_at_level(dendo,level) mod = community.modularity(partition, D) for n in G: G.node[n]['m'] = partition[n] return mod
def __init__(self, directed_graph): self.directed_graph = directed_graph dendogram = community.generate_dendogram(self.directed_graph.to_undirected()) partitions = community.partition_at_level(dendogram, len(dendogram)-1) communities = self._get_communities(partitions) major_communities = self._get_large_communities(communities) self.community_graphs = self._build_community_graphs(communities, valid_communities=major_communities) self.community_rankings = self._pagerank_communities(self.community_graphs)
def partition(G): undirected_G = G.to_undirected() dendo = com.generate_dendrogram(undirected_G, None, weight='weight', resolution=part_para) pdendo = [] for i in range(len(dendo)): pdendo.append(com.partition_at_level(dendo, i)) #print i, com.partition_at_level(dendo, i) outfile = open("partition.json", "w+") json.dump(pdendo, outfile) print " Check File\"partition.json\" for the partition tree." sdendo = sort_partition(G, dendo) outfile = open("sorted_partition.json", "w+") pdendo = [] for i in range(len(sdendo)): pdendo.append(com.partition_at_level(sdendo, i)) json.dump(pdendo, outfile) print " Check File\"sorted_partition.json\" for the sorted partition tree." return len(dendo)
def louvain (graph): """ Louvain clustering, returns dictionary where each key is the level of clustering and the values are the clustering themselfs as returned by to_clusters_dict method. """ community.__MIN = 1e-12 dendo = community.generate_dendrogram(graph) multilevel = {} for level in range(len(dendo) - 1): tmp = community.partition_at_level(dendo, level) # tmp is a dictionary where keys are the nodes and the values are the set it belongs to multilevel[level] = to_clusters_dict(tmp) return multilevel
def modularize(edgeGraph, nodeDf, nameOfModularityColumn=u'Community_Lvl_0'): ''' uses the original code of the louvain algorithm to give modularity to a graph ''' #compute the best partition dendrogram = community.generate_dendrogram(edgeGraph, weight='weight') dendroBestPartitionDict = community.partition_at_level(dendrogram, len(dendrogram)-1) #dendroBestPartitionDict = community.best_partition(graph) #add a column to the node data frame so we can add the community values if nameOfModularityColumn not in nodeDf.columns: nodeDf[nameOfModularityColumn] = np.nan #add the community values to the node data frame nodeDf[nameOfModularityColumn] = nodeDf[u'Id'].map(dendroBestPartitionDict) #making sure all 'modularity_class' NaN were deleted return nodeDfCleaner(nodeDf), dendrogram
def louvain(graph): """ Louvain clustering, returns dictionary where each key is the level of clustering and the values are the clustering themselfs as returned by to_clusters_dict method. """ community.__MIN = 1e-12 dendo = community.generate_dendrogram(graph) multilevel = {} for level in range(len(dendo) - 1): tmp = community.partition_at_level(dendo, level) # tmp is a dictionary where keys are the nodes and the values are the set it belongs to multilevel[level] = to_clusters_dict(tmp) return multilevel
def run_louvain(experiment_dir): g = nx.read_edgelist(os.path.join(experiment_dir, 'projection.txt'), create_using=nx.DiGraph, data=[('weight', float), ('p_prereq', float), ('p_course', float)]) g = g.to_undirected() d = community.generate_dendrogram(g) level_0 = community.partition_at_level(d, 0) for i in range(max(level_0.values())): print('=' * 40) major = [] for class_id, partition_num in level_0.items(): if partition_num == i: major.append(class_id) print(major) print('=' * 40)
def study_dendrogram(G, filename): dendrogram = co.generate_dendrogram(G) modularity_at_level = dict() print("Dendrogram has {} levels".format(len(dendrogram))) for level in range(len(dendrogram)): part = co.partition_at_level(dendrogram, level) print("Found {} communities at level {}".format(len(set(part.values())), level)) modularity_at_level[level] = co.modularity(part, G) plt.plot(list(modularity_at_level.keys()), list(modularity_at_level.values()), linestyle='dotted', marker = 'o', markersize=8) plt.xlabel("l - Level") plt.ylabel("Q - Modularity") if filename: plt.savefig("drawings/"+filename) plt.show() return dendrogram
def test_nodes_stay_together(self): """ Test that two nodes in the same community at one level stay in the same at higher level """ g = nx.erdos_renyi_graph(500, 0.01) dendo = co.generate_dendogram(g) parts = dict([]) for l in range(len(dendo)) : parts[l] = co.partition_at_level(dendo, l) for l in range(len(dendo)-1) : p1 = parts[l] p2 = parts[l+1] coms = set(p1.values()) for com in coms : comhigher = [ p2[node] for node, comnode in p1.iteritems() if comnode == com] self.assertEqual(len(set(comhigher)), 1)
def add_cluster_labels_to_nodes(nodes_pdf, edges_pdf, weight_col='lift'): """ Decorate node_pdf with columns marking the cluster(s) each node belongs to, using the Louvain algorithm. These cluster columns are added to nodes_pdf as a side effect. """ import networkx as nx G = nx.Graph() elist = [(r['from'], r['to'], r[weight_col]) for i, r in edges_pdf.iterrows()] G.add_weighted_edges_from(elist) dendro = community_louvain.generate_dendrogram(G) for level in range(0, len(dendro)): cluster_level_name = f"level_{level}_cluster" partition = community_louvain.partition_at_level(dendro, level) nodes_pdf[cluster_level_name] = [ partition[x] for x in nodes_pdf['id'] ] # [partition[node_id[x]] for x in nodes_pdf['label']]
def get_communities_level(self, level, relevant_entities=None): communities_main_entities = {} partition = community.partition_at_level(self.dendrogram, level) for entity, partition_code in partition.iteritems(): if (relevant_entities is not None) and\ (entity not in relevant_entities): continue if partition_code not in communities_main_entities: communities_main_entities[partition_code] = entity else: new_entity_weight = self._get_weight(entity) current_entity = communities_main_entities[partition_code] current_entity_weight = self._get_weight(current_entity) if current_entity_weight < new_entity_weight: communities_main_entities[partition_code] = entity return communities_main_entities
def run_louvain(experiment_dir): g = get_networkx_graph(experiment_dir) g = g.to_undirected() d = community.generate_dendrogram(g) level_0 = community.partition_at_level(d, 0) majors = [] for i in range(max(level_0.values())): major = [] for class_id, partition_num in level_0.items(): if partition_num == i: major.append(class_id) majors.append(major) with open(os.path.join(experiment_dir, 'louvain.json'), 'w') as outfile: json.dump(majors, outfile, indent=4)
def get_community(weight): """ 进行图聚类,发现社区 weight: 选择哪个变量作为权重 """ FG = nx.Graph() FG.add_weighted_edges_from(graph_data[['from_id', 'to_id', weight]].values) result = pd.DataFrame({'id': list(FG.nodes)}) print('node number: %s' % len(result)) dendrogram = community.generate_dendrogram(FG) for level in range(len(dendrogram)): the_partition = community.partition_at_level(dendrogram, level) result['%s_label_%s' % (weight, level)] = list(the_partition.values()) return result
def identify_clusters(graph, louvain_level=-1): """ Identifies clusters in the given NetworkX Graph by Louvain partitioning. The parameter louvain_level controls the degree of partitioning. 0 is the most granular partition, and granularity decreases as louvain_level increases. Since the number of levels can't be known a priori, negative values "count down" from the max - ie, -1 means to use the maximum possible value and thus get the largest clusters """ dendrogram = community.generate_dendrogram(graph) if louvain_level < 0: louvain_level = max(0, len(dendrogram) + louvain_level) if louvain_level >= len(dendrogram): #print("Warning [identify_clusters]: louvain_level set to {}, max allowable is {}. Resetting".format(louvain_level, len(dendrogram)-1), file=sys.stderr) louvain_level = len(dendrogram) - 1 #print("Cutting the Louvain dendrogram at level {}".format(louvain_level), file=sys.stderr) return community.partition_at_level(dendrogram, louvain_level)
def extract_network_metrics(mdg, ts, team=True): met = {} dsg = extract_dpsg(mdg, ts, team) if team : pre = 'full:' else: pre = 'user:'******'nodes_count'] = dsg.number_of_nodes() met[pre+'edges_count'] = dsg.number_of_edges() met[pre+'density'] = nx.density(dsg) met[pre+'betweenness'] = nx.betweenness_centrality(dsg) met[pre+'avg_betweenness'] = float(sum(met[pre+'betweenness'].values()))/float(len(met[pre+'betweenness'].values())) met[pre+'betweenness_count'] = nx.betweenness_centrality(dsg, weight='count') met[pre+'avg_betweenness_count'] = float(sum(met[pre+'betweenness_count'].values()))/float(len(met[pre+'betweenness_count'].values())) met[pre+'betweenness_effort'] = nx.betweenness_centrality(dsg, weight='effort') met[pre+'avg_betweenness_effort'] = float(sum(met[pre+'betweenness_effort'].values()))/float(len(met[pre+'betweenness_effort'].values())) met[pre+'in_degree'] = dsg.in_degree() met[pre+'avg_in_degree'] = float(sum(met[pre+'in_degree'].values()))/float(len(met[pre+'in_degree'].values())) met[pre+'out_degree'] = dsg.out_degree() met[pre+'avg_out_degree'] = float(sum(met[pre+'out_degree'].values()))/float(len(met[pre+'out_degree'].values())) met[pre+'degree'] = dsg.degree() met[pre+'avg_degree'] = float(sum(met[pre+'degree'].values()))/float(len(met[pre+'degree'].values())) met[pre+'degree_count'] = dsg.degree(weight='count') met[pre+'avg_degree_count'] = float(sum(met[pre+'degree_count'].values()))/float(len(met[pre+'degree_count'].values())) met[pre+'degree_effort'] = dsg.degree(weight='effort') met[pre+'avg_degree_effort'] = float(sum(met[pre+'degree_effort'].values()))/float(len(met[pre+'degree_effort'].values())) usg = dsg.to_undirected() dendo = co.generate_dendrogram(usg) if len(dendo)>0 and isinstance(dendo, list): partition = co.partition_at_level(dendo, len(dendo) - 1 ) met[pre+'partitions'] = {} for com in set(partition.values()): members = [nodes for nodes in partition.keys() if partition[nodes] == com] for member in members: met[pre+'partitions'][member] = com met[pre+'louvain_modularity'] = co.modularity(partition, usg) else: met[pre+'louvain_modularity'] = None connected_components = nx.connected_component_subgraphs(usg) shortest_paths = [nx.average_shortest_path_length(g) for g in connected_components if g.size()>1] if len(shortest_paths) > 0: met[pre+'avg_distance'] = max(shortest_paths) else: met[pre+'avg_distance'] = None return met
def louvain(G): dendo = lvcm.generate_dendrogram(graph=G, weight='weight', resolution=7., randomize=True) partition = lvcm.partition_at_level(dendo, len(dendo) - 1) #a = set(partition.values()) print(partition) #partition = community_louvain.best_partition(G) #print(set(partition.values())) #print(len(set(partition.values()))) out = defaultdict(list) for k, v in partition.items(): out[v].append(k) print(out)
def test_nodes_stay_together(self): """ Test that two nodes in the same community at one level stay in the same at higher level """ g = nx.erdos_renyi_graph(500, 0.01) dendo = co.generate_dendrogram(g) parts = dict([]) for l in range(len(dendo)): parts[l] = co.partition_at_level(dendo, l) for l in range(len(dendo) - 1): p1 = parts[l] p2 = parts[l + 1] coms = set(p1.values()) for com in coms: comhigher = [ p2[node] for node, comnode in p1.items() if comnode == com ] self.assertEqual(len(set(comhigher)), 1)
def louvain(adjacency_matrix): """ Performs community embedding using the LOUVAIN method. Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008). Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008. Inputs: - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix. Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix. """ # Convert to networkx undirected graph. adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix, create_using=nx.Graph()) # Call LOUVAIN algorithm to calculate a hierarchy of communities. tree = community.generate_dendogram(adjacency_matrix, part_init=None) # Embed communities row = list() col = list() append_row = row.append append_col = col.append community_counter = 0 for i in range(len(tree)): partition = community.partition_at_level(tree, i) for n, c in partition.items(): append_row(n) append_col(community_counter + c) community_counter += max(partition.values()) + 1 row = np.array(row) col = np.array(col) data = np.ones(row.size, dtype=np.float64) louvain_features = sparse.coo_matrix( (data, (row, col)), shape=(len(partition.keys()), community_counter), dtype=np.float64) return louvain_features
def calc_louvain(adj_matrix, level=0, return_c_graph=False): nx_G = nx.from_numpy_array(adj_matrix) dendro = louvain.generate_dendrogram( nx_G, randomize=False, random_state=0) #Maybe set randomize True #print(dendro) #asdasd level = len(dendro) - level - 1 if level < 0: raise Exception("The given Level is too deep. The maximum is: " + str(len(dendro) - 1)) communities = louvain.partition_at_level(dendro, level) number_communities = max(communities, key=lambda x: communities[x]) + 1 # Maybe unnecessary after some code rework and unification community_list = [] for i in range(number_communities): grp_list = [] for grp in communities: if communities[grp] == i: grp_list.append(grp) else: if grp_list: community_list.append(grp_list) community_level_G = louvain.induced_graph(communities, nx_G) if return_c_graph: c_level_graph = nx.adjacency_matrix(community_level_G) else: c_level_graph = None inv_dendro = [] for dct in dendro: inv_dct = {} for k, v in dct.items(): inv_dct.setdefault(v, []).append(k) inv_dendro.append(inv_dct) return community_list, c_level_graph, dendro, inv_dendro
def louvain(adjacency_matrix): """ Performs community embedding using the LOUVAIN method. Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008). Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008. Inputs: - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix. Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix. """ # Convert to networkx undirected graph. adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix, create_using=nx.Graph()) # Call LOUVAIN algorithm to calculate a hierarchy of communities. tree = community.generate_dendogram(adjacency_matrix, part_init=None) # Embed communities row = list() col = list() append_row = row.append append_col = col.append community_counter = 0 for i in range(len(tree)): partition = community.partition_at_level(tree, i) for n, c in partition.items(): append_row(n) append_col(community_counter + c) community_counter += max(partition.values()) + 1 row = np.array(row) col = np.array(col) data = np.ones(row.size, dtype=np.float64) louvain_features = sparse.coo_matrix((data, (row, col)), shape=(len(partition.keys()), community_counter), dtype=np.float64) return louvain_features
def extract_louvain_modularity(g): met = {} usg = g.copy() isolated = nx.isolates(usg) usg.remove_nodes_from(isolated) dendo = co.generate_dendrogram(usg) if len(dendo)>0 and isinstance(dendo, list): partition = co.partition_at_level(dendo, len(dendo) - 1 ) met['partitions'] = {} for com in set(partition.values()): members = [nodes for nodes in partition.keys() if partition[nodes] == com] for member in members: met['partitions'][member] = com met['modularity'] = co.modularity(partition, usg) # for node in isolated: # met['partitions'][node] = None else: met['partitions'] = None met['modularity'] = None return met
def preprocess(): data = sio.loadmat('f_data/phishing_2013_filter.mat') phish_data = data['phish'] prefix_data = data['networks'] # computeWeightToFile('f_data/weight.mat', phish_data) G = genGraphFromFile('f_data/weight.mat') # print 'load file success' # S = ComuputeSimilarity(phish_data) #G = nx.Graph() #genGraph(phish_data, G) # nx.write_gml(G, 'data/graph') # nx.draw(G) # partition = communityDetect(G) # partition = readResult("data/partition1") dendo = community.generate_dendrogram(G) # print len(dendo) # print 'partition sucess', len(dendo) # filename = "f_data/partition" for level in range(len(dendo)): partition = community.partition_at_level(dendo, level) print 'size', len(set(partition.values())) saveResult(filename + str(level), partition)
def preprocess(): data = sio.loadmat('f_data/phishing_2013_filter.mat') phish_data = data['phish'] prefix_data = data['networks'] # computeWeightToFile('f_data/weight.mat', phish_data) G = genGraphFromFile('f_data/weight.mat') # print 'load file success' # S = ComuputeSimilarity(phish_data) #G = nx.Graph() #genGraph(phish_data, G) # nx.write_gml(G, 'data/graph') # nx.draw(G) # partition = communityDetect(G) # partition = readResult("data/partition1") dendo = community.generate_dendrogram(G) # print len(dendo) # print 'partition sucess', len(dendo) # filename = "f_data/partition" for level in range(len(dendo)): partition = community.partition_at_level(dendo,level) print 'size', len(set(partition.values())) saveResult(filename + str(level), partition)
def extract_louvain_modularity(g): met = {} usg = g.copy() isolated = nx.isolates(usg) usg.remove_nodes_from(isolated) dendo = co.generate_dendrogram(usg) if len(dendo) > 0 and isinstance(dendo, list): partition = co.partition_at_level(dendo, len(dendo) - 1) met['partitions'] = {} for com in set(partition.values()): members = [ nodes for nodes in partition.keys() if partition[nodes] == com ] for member in members: met['partitions'][member] = com met['modularity'] = co.modularity(partition, usg) # for node in isolated: # met['partitions'][node] = None else: met['partitions'] = None met['modularity'] = None return met
def gen_clusters(edges_file, resolution=dflt_resolution): with open(edges_file, "rb") as fp: G = nx.read_weighted_edgelist(fp) dendrogram = community.generate_dendrogram(G, resolution=0.25) len_d = len(dendrogram) print("{} items in dendrogram".format(len_d)) gids2names.load_groups_file("data/groups.txt") for level in range(len_d): print() partition = community.partition_at_level(dendrogram, level) modularity = community.modularity(partition, G) print("partition at level {} is\n{}".format(level, pformat(partition))) print("modularity at level {} is {}".format(level, modularity)) for com in set(partition.values()): list_nodes = sorted([nodes for nodes in partition.keys() if partition[nodes] == com]) print("nodes: {}".format(json.dumps(list_nodes))) print(" groups:") for gid, name in gids2names.generate_group_names( group_ids_list=list_nodes): print(" {} {}".format(gid, name))
def predict(self): """Predict using community structure If two nodes belong to the same community, they are predicted to form a link. This uses the Louvain algorithm, which determines communities at different granularity levels: the finer grained the community, the higher the resulting score. You'll need to install Thomas Aynaud's python-louvain package from https://bitbucket.org/taynaud/python-louvain for this. """ try: from community import generate_dendogram, partition_at_level except ImportError: raise ImportError("Module 'community' could not be found. " "Please install python-louvain from " "https://bitbucket.org/taynaud/python-louvain") from collections import defaultdict res = Scoresheet() dendogram = generate_dendogram(self.G) for i in range(len(dendogram)): partition = partition_at_level(dendogram, i) communities = defaultdict(list) weight = len(dendogram) - i # Lower i, smaller communities for n, com in six.iteritems(partition): communities[com].append(n) for nodes in six.itervalues(communities): for u, v in all_pairs(nodes): if not self.eligible(u, v): continue res[(u, v)] += weight return res
def predict(self): # pylint:disable=E0202 """Predict using community structure If two nodes belong to the same community, they are predicted to form a link. This uses the Louvain algorithm, which determines communities at different granularity levels: the finer grained the community, the higher the resulting score. This needs the python-louvain package. Install linkpred as follows: $ pip install linkpred[community] """ try: import community except ImportError: raise ImportError("Module 'community' could not be found. " "Please install linkpred as follows:\n" "$ pip install linkpred[community]") res = Scoresheet() dendogram = community.generate_dendrogram(self.G) for i in range(len(dendogram)): partition = community.partition_at_level(dendogram, i) communities = defaultdict(list) weight = len(dendogram) - i # Lower i, smaller communities for n, com in partition.items(): communities[com].append(n) for nodes in communities.values(): for u, v in all_pairs(nodes): if not self.eligible(u, v): continue res[(u, v)] += weight return res
cluster2 = community.best_partition(graph) #print run time for c1 runtimeC2 = timeit.default_timer() - startC2 mod2 = community.modularity(cluster2, graph) graph = nx.read_edgelist("Data/a.data") #timer start c1 startC3 = timeit.default_timer() tmp = community.generate_dendogram(graph) cluster3 = community.partition_at_level(tmp, 0) #print run time for c1 runtimeC3 = timeit.default_timer() - startC3 mod3 = community.modularity(cluster3, graph) print "modularity: 1:%f; 2:%f; 3:%f" % (mod1,mod2, mod3) nmi1 = calculate_NMI(cluster1, cluster2) print "nmi between cluster1 and cluster 2: %.10f" % nmi1 nmi2 = calculate_NMI(cluster1, cluster3) print "nmi between cluster1 and cluster 3: %.10f" % nmi2 nmi3 = calculate_NMI(cluster2, cluster3) print "nmi between cluster2 and cluster 3: %.10f" % nmi3
def build_json(hierarchy_dict, h5_data, dataset_name, graph, json, threshold): # data set dict ds_dict = {} # graph dict g_dict = {} # Maximum hierarchy size hmax = len(hierarchy_dict["dendro"]) - 1 # Add pseudo entry to trigger single node dict creation hierarchy_dict[hmax + 1] = {} for hidx, hdict in hierarchy_dict.items(): if not isinstance(hidx, int): continue # Dendrogram list is sorted inversely to hierarchy dict. Therefore, the dendrogram index has to be recalculated. didx = hmax - hidx # edge dict e_dict = {} # node dict n_dict = {} # hierarchy dicr h_dict = {} if didx > -1: # Nodes for com, nodes in hierarchy_dict["inv_dendro"][didx].items(): # attribute dict a_dict = {} a_dict["index"] = com a_dict["name"] = "h%in%i" % (hidx, com) a_dict["childs"] = nodes a_dict["mzs"] = list( h5_data.columns[hdict["communities"][com]]) try: a_dict["membership"] = hierarchy_dict["dendro"][didx + 1][com] except Exception as e: print(e) n_dict["h%in%i" % (hidx, com)] = a_dict else: # single nodes are always first entry in dendro for node, com in hierarchy_dict["dendro"][0].items(): a_dict = {} a_dict["index"] = node a_dict["name"] = h5_data.columns[node] a_dict["membership"] = com a_dict["mzs"] = [h5_data.columns[node]] n_dict["h%in%i" % (hidx, node)] = a_dict # Edges if didx > -1: community = louvain.partition_at_level(hierarchy_dict["dendro"], didx) edges = louvain.induced_graph(community, graph).edges(data=True) else: edges = graph.edges(data=True) idx = 0 for source, target, weight in edges: # Include source == target for inner edge weight. #print(weight) if source != target: a_dict = {} a_dict["index"] = idx a_dict["name"] = "h%ie%i" % (hidx, idx) a_dict["source"] = "h%in%i" % (hidx, source) a_dict["target"] = "h%in%i" % (hidx, target) try: count = weight["count"] except: count = 1 #print(count) a_dict["weight"] = weight["weight"] / count e_dict["h%ie%i" % (hidx, idx)] = a_dict idx += 1 h_dict["nodes"] = n_dict h_dict["edges"] = e_dict g_dict["hierarchy%i" % (hidx)] = h_dict ds_dict["graph"] = g_dict ds_dict["dataset"] = dataset_name ds_dict["threshold"] = threshold #mzs = [x for x in np.round(h5_data.columns, 3)] mzs = [x for x in h5_data.columns] mzs_dict = {} for mz in mzs: mzs_dict[str(mz)] = {} for hy, vals in g_dict.items(): for nid, props in vals["nodes"].items(): try: if mz in props["mzs"]: mzs_dict[str(mz)][hy] = nid break # Last hierarchy has no "mzs" prop except Exception as e: print(e) if mz == props["name"]: mzs_dict[str(mz)][hy] = nid ds_dict["mzs"] = mzs_dict json["graphs"]["graph%i" % (hierarchy_dict["graph_idx"])] = ds_dict return json
def print_dendrogram(self): dendo = community.generate_dendogram(self.G) for level in range(len(dendo) - 1) : print "partition at level", level, "is", community.partition_at_level(dendo, level)
doResolution = float(sys.argv[2]) sys.stderr.write("Using resolution " + str(doResolution) + ".\n") # read data from edges input file G = networkx.Graph() # create a new undirected graph G = networkx.read_edgelist(inputFile, nodetype=int, data=(('weight',int))) # read as int-weighted # G = networkx.read_edgelist(inputFile, nodetype=int) # read as unweighted sys.stderr.write("Done reading.\n") # do community detection and get dendrograph of communities dendo = community.generate_dendrogram(G, part_init=None, resolution=doResolution, weight='weight') # store communities at different levels parts = {} for level in range(0, len(dendo)): parts[level] = community.partition_at_level(dendo, level) levels = len(dendo) # just do plain community detection instead of nested variant #levels = 1 #parts[0] = community.best_partition(G) # find communities # output header to stdout sys.stdout.write("Id") communitySize = {} for level in range(0, levels): sys.stdout.write("\tCommunity_Res" + str(doResolution) + "_Level" + str(level+1)) communitySize[level] = -1 sys.stdout.write("\n") # output nodelist with communities to stdout
G.add_edge(i, j, weight=w_ij) nx.draw_spring(G) dst = os.path.join(out_dir, 'CC-Network(ccthr=%d, thr=%d, ref_journal_flag=%s).png' % (ccthr, thr, ref_journal_flag)) plt.savefig(dst) plt.close('all') #... if verbose: print "....computing communities with Louvain algo" dendogram = community.generate_dendogram(G, part_init=None) #... output infos print "....There are %d references in the database (contain duplicates)" % (nb_total_refs) print "....There are %d references in the database (contain no duplicate)" % (nb_refs) print "....There are %d references in the CC network\n......(ie sharing at least %d article(s) with another reference)" % (len(G.nodes()), ccthr) for level in range(len(dendogram)): part = community.partition_at_level(dendogram, level) mod = community.modularity(part, G) nb_comm = len(set(part.values())) size_sup10 = 0; size_sup100 = 0; #communities_caracteristics(partition, thr, level) for com in set(part.values()) : list_nodes = [nodes for nodes in part.keys() if part[nodes] == com] if len(list_nodes) > 100: size_sup100 += 1 if len(list_nodes) > 10: size_sup10 += 1 print "....level %d: %d communities [%d with size > 10, %d with size > 100], modularity Q=%1.6f" % (level, nb_comm, size_sup10, size_sup100, mod) ############################## ## WHICH EXTRACTION ? print "..CC communities extraction" # confirm = 'n'; level = len(dendogram) - 1; thr = 10
k4_mod = modularity(k4_cores, graph) kmax_mod = modularity(kmax_cores, graph) print 'k4 mod', k4_mod print 'kmax mod', kmax_mod k4_wcc = wcc1(k4_cores[0], graph) kmax_wcc = wcc1(kmax_cores[0], graph) print 'k4 wcc', k4_wcc print 'kmax wcc', kmax_wcc dendro = comm.generate_dendrogram(graph) louvain_steps = [] for level in range(len(dendro)): partition = comm.partition_at_level(dendro, level) clusters = {} for key, value in sorted(partition.iteritems()): clusters.setdefault(value, []).append(key) communities = [] for key, value in clusters.iteritems(): if len(value) > 0: communities.append(graph.subgraph(value)) louvain_steps.append([len(set(partition.values())), modularity(communities, graph)]) result = { 'max_core': max_k, 'num_4-cores': len(k4_cores), 'modularity_max-cores': kmax_mod, 'modularity_4-cores': k4_mod, "wcc_max-cores": kmax_wcc, "wcc_4-cores": k4_wcc, 'louvain_steps': louvain_steps } db.bgroups.update({ '_id': gid }, { '$set': result }, upsert = False, multi = False)
k4_mod = modularity(k4_cores, graph) kmax_mod = modularity(kmax_cores, graph) print 'k4 mod', k4_mod print 'kmax mod', kmax_mod k4_wcc = wcc1(k4_cores[0], graph) kmax_wcc = wcc1(kmax_cores[0], graph) print 'k4 wcc', k4_wcc print 'kmax wcc', kmax_wcc dendro = comm.generate_dendrogram(graph) louvain_steps = [] for level in range(len(dendro)): partition = comm.partition_at_level(dendro, level) clusters = {} for key, value in sorted(partition.iteritems()): clusters.setdefault(value, []).append(key) communities = [] for key, value in clusters.iteritems(): if len(value) > 0: communities.append(graph.subgraph(value)) louvain_steps.append( [len(set(partition.values())), modularity(communities, graph)]) result = {
import json from util.read_utils import lines_per_n import community import networkx as nx author_graph = nx.DiGraph() with open('clean_data.json', 'r') as jfile: for chunk in lines_per_n(jfile, 9): hdr_data = json.loads(chunk) for to_addr in str(hdr_data['To']).split(","): if '@' in to_addr: author_graph.add_edge(str(hdr_data['From']), to_addr.strip(), style='solid', label=hdr_data['Time']) for cc_addr in str(hdr_data['Cc']).split(","): if '@' in to_addr: author_graph.add_edge(str(hdr_data['From']), cc_addr.strip(), style='dashed', label=hdr_data['Time']) jfile.close() print("No. of Weakly Connected Components:", nx.number_weakly_connected_components(author_graph)) print("No. of Strongly Connected Components:", nx.number_strongly_connected_components(author_graph)) print("Nodes:", nx.number_of_nodes(author_graph)) print("Edges:", nx.number_of_edges(author_graph)) #The following lines of code generate a dendogram for the above graph dendo = community.generate_dendogram(author_graph.to_undirected()) for level in range(len(dendo)) : print("Partition at level", level, "is", community.partition_at_level(dendo, level)) print("-"*10)