def communityMining(G, minCommSize=10): """ Find communities in the graph 'G' with more than 'minCommSize' nodes. """ count = 0 dendrogram = community.generate_dendrogram(G) firstPartition = community.partition_at_level(dendrogram,0) sys.stderr.write("Prune sparse clusters. ") #remove early small communities sparseComm = set([k for k,v in Counter(firstPartition.values()).iteritems() if v<minCommSize]) nodes = [node for node in G.nodes() if firstPartition[node] in sparseComm] G.remove_nodes_from(nodes) sys.stderr.write("Find communities. ") # Partition again the graph and report big communities: dendrogram = community.generate_dendrogram(G) partition = community.partition_at_level(dendrogram,len(dendrogram)-2) allfqdns = set(n for n,d in G.nodes(data=True) if d['bipartite']==1) allHosts = set(n for n,d in G.nodes(data=True) if d['bipartite']==0) size = float(len(set(partition.values()))) communities = [] bigComm = [k for k,v in Counter(partition.values()).iteritems() if v>minCommSize] for com in bigComm : comfqdns = [nodes for nodes in allfqdns if partition[nodes] == com] comHosts = [nodes for nodes in allHosts if partition[nodes] == com] comm = G.subgraph(comfqdns+comHosts) if comm.order() < minCommSize : sys.stderr("Remove small community (This shouldn't happen here?)\n") continue communities.append(comm) return communities
def _graph_community(G): # unused function '''The 'graph_community' function is used to analyse a corpus at two levels of the dendrogram of the corpus coupling graph G in a way that the size of all the communities are <= SIZECUT. Author: Sebastian Grauwin (http://sebastian-grauwin.com/bibliomaps/) Args: G (networkx object): corpus coupling graph. Returns: louvain_partition (dict): partition of the corpus coupling graph G. ''' # 3rd party import import community as community_louvain # TO DO: move SIZECUT in COUPL_GLOBAL_VALUES if _graph_community is used SIZECUT = 10 # Upper limit of size communities dendrogram, part, max_mod = _runpythonlouvain(G) part2 = part.copy() to_update = {} communities_id, nodes_id = set(part.values()), list(part.keys()) for community_id in communities_id: list_nodes = [ nodes for nodes in part.keys() if part[nodes] == community_id ] if len(list_nodes) > SIZECUT: # split clusters of size > SIZECUT H = G.subgraph(list_nodes).copy() [dendo2, partfoo, mod] = _runpythonlouvain(H) dendo2 = community_louvain.generate_dendrogram(H, part_init=None) partfoo = community_louvain.partition_at_level( dendo2, len(dendo2) - 1) # add prefix code for aaa in partfoo.keys(): partfoo[aaa] = (community_id + 1) * 1000 + partfoo[aaa] nb_comm = len(set(partfoo.values())) # "community_id" cluster ("len(list_nodes)" records) is split in nb_comm sub-clusters part2.update(partfoo) else: # for communities of less than SIZECUT nodes, shift the com label as well for n in list_nodes: to_update[n] = "" for n in to_update: part2[n] += 1 # ... save partitions louvain_partition = dict() for lev in range(len(dendrogram)): louvain_partition[lev] = community_louvain.partition_at_level( dendrogram, lev) # .. set communtity labels starting from 1 instead of 0 for top level for k in louvain_partition[len(dendrogram) - 1].keys(): louvain_partition[len(dendrogram) - 1][k] += 1 louvain_partition[len(dendrogram)] = part2 return louvain_partition
def calc_louvain(adj_matrix, level=0, return_c_graph=False): nx_G = nx.from_numpy_matrix(adj_matrix) dendro = louvain.generate_dendrogram( nx_G, randomize=False) #Maybe set tandomize True if len(dendro) - level - 1 < 0: raise Exception("The given Level is too deep. The maximum is: " + str(len(dendro) - 1)) communities = louvain.partition_at_level(dendro, len(dendro) - level - 1) number_communities = max(communities, key=lambda x: communities[x]) + 1 # Maybe unnecessary after some code rework and unification community_list = [] for i in range(number_communities): grp_list = [] for grp in communities: if communities[grp] == i: grp_list.append(grp) else: if grp_list: community_list.append(grp_list) community_level_G = louvain.induced_graph(communities, nx_G) if return_c_graph: c_level_graph = nx.adjacency_matrix(community_level_G) else: c_level_graph = None return community_list, c_level_graph
def prepare_communities(self): if hasattr(community, 'generate_dendrogram'): self.dendrogram = community.generate_dendrogram(self.g) else: self.dendrogram = community.generate_dendogram(self.g) for level in range(len(self.dendrogram)): pass
def external_ec_coarsening(graph, sfdp_path, coarsening_scheme = 2, c_type = 'original'): if c_type == 'louvain': print("Coarsening with Louvain") matrix = magicgraph.to_adjacency_matrix(graph) nx_graph = nx.from_scipy_sparse_matrix(matrix) dendro = community.generate_dendrogram(nx_graph) coarse_graphs = [DoubleWeightedDiGraph(graph)] merges = [] i = 0 for l in range(len(dendro)): level = community.partition_at_level(dendro, l) induced = community.induced_graph(level, nx_graph) filename = 'induced'+str(l)+'.edgelist' #nx.write_edgelist(induced, filename) # write weighted graph to file f = open(filename, 'w') for u, v, a in induced.edges.data('weight', default = 1): line = ' '.join([str(u), str(v), str(a)]) f.write(line + '\n') f.close() m_graph = magicgraph.load_weighted_edgelist(filename, undirected = True) coarse_graphs.append(DoubleWeightedDiGraph(m_graph)) merges.append(level) print('Level: ', i, 'N nodes: ', m_graph.number_of_nodes()) i+= 1 return coarse_graphs, merges elif c_type == 'original': return original_coarsening(graph, sfdp_path, coarsening_scheme)
def apply_community_louvain(G): start_node_id, end_node_id = get_start_and_end_nodes(G) partition = community_louvain.best_partition(G) dendo = community_louvain.generate_dendrogram(G) highest_partition = community_louvain.partition_at_level( dendo, (len(dendo) - 1)) communities = set(highest_partition.values()) print("Communities;") list_of_communities = [] community_count = 0 for community_number in communities: community_items = [ x for x in highest_partition if highest_partition[x] == community_number ] if start_node_id in community_items or end_node_id in community_items: continue list_of_communities.append(community_items) community_count = community_count + 1 print(f"Community number {community_count}: {community_items}") return list_of_communities
def louvianClustering(self, similarity_measure_list): edge_list = [] node_list = [] thresh = 0 #self.getThreshold(similarity_measure_list) for element in similarity_measure_list: f1, f2, val = element if (float(val) > thresh): edge_list.append((f1, f2, float(val))) node_list.append(f1) node_list.append(f2) node_list = list(set(node_list)) G = nx.Graph() G.add_nodes_from(node_list) G.add_weighted_edges_from(edge_list) partition = community.best_partition(G) dendo = community.generate_dendrogram(G, None, 'weight', 1., False) testing = community.partition_at_level(dendo, len(dendo) - 1) res = community.modularity(partition, G, 'weight') list1 = [partition] cluster_set = set(val for dic in list1 for val in dic.values()) cluster_set_elements = [] for cluster_id in cluster_set: temp_elements = [] for node, cluster in partition.iteritems(): if (cluster == cluster_id): temp_elements.append(node) cluster_set_elements.append(temp_elements) self.cluster_set = cluster_set_elements return cluster_set_elements
def generateDendogram(self): self.Order = [] self.dendogram = cm.generate_dendrogram(self.g) for level in range(len(self.dendogram)): self.Order.append(OrderedDict(sorted(self.dendogram[level].items(), key=lambda t: t[1])))
def louvain_community_detection(networkx_graph): """ Do louvain community detection :param networkx_graph: :return: """ return cm.partition_at_level(cm.generate_dendrogram(networkx_graph, randomize=True, weight='weight'), 0)
def test_modularity_increase(self): """ Generate a dendrogram and test that modularity is always increasing """ graph = nx.erdos_renyi_graph(1000, 0.01) dendo = co.generate_dendrogram(graph) mods = [co.modularity(co.partition_at_level(dendo, level), graph) for level in range(len(dendo))] self.assertListEqual(mods, sorted(mods))
def _runpythonlouvain(G): # unused function '''The "_runpythonlouvain" function is used to analyse a corpus at level "len(foo_dendrogram) - 1)" of the corpus coupling graph G dendrogram, (see https://buildmedia.readthedocs.org/media/pdf/python-louvain/latest/python-louvain.pdf). Author: Sebastian Grauwin (http://sebastian-grauwin.com/bibliomaps/) Args: G (networkx object): corpus coupling graph. Returns: results (tuple): [dendrogram, partition, modularity,] where dendrogram [list of dict]: a list of partitions, ie dictionnaries where keys of the i+1 dict are the values of the i dict; partition (dict): Louvain partition of the corpus coupling graph G where dict keys are the pub IDs and the dict values are the community IDs; modularity [float]: modularity. ''' # standard library imports from collections import namedtuple # 3rd party import import community as community_louvain # TO DO: move NRUNS in COUPL_GLOBAL_VALUES if _runpythonlouvain is used. NRUNS = 1 # number of time the louvain algorithm is run for a given network, # the best partition being kept. named_tup_results = namedtuple('results', [ 'dendrogram', 'partition', 'modularity', ]) max_modularity = -1 for run in range(NRUNS): if NRUNS > 1: print(f'......run {run + 1}/{NRUNS}') foo_dendrogram = community_louvain.generate_dendrogram(G, part_init=None) partition_foo = community_louvain.partition_at_level( foo_dendrogram, len(foo_dendrogram) - 1) modularity = community_louvain.modularity(partition_foo, G) if modularity > max_modularity: max_modularity = modularity partition = partition_foo.copy() dendrogram = foo_dendrogram.copy() louvain_part = named_tup_results( dendrogram, partition, modularity, ) return louvain_part
def dend(file): seqs,seqnames=getseqs(file) allDistTuple,g=make_allDistTuple_fast(seqs) g=kstep(allDistTuple,g) dend=community.generate_dendrogram(g,weight='len') # print('-') for item in dend: print(item) # print('-') return dend, seqnames
def busmap_by_louvain(network, level=-1): lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=1./network.lines.x).set_index(['bus0','bus1']) G = nx.Graph() G.add_nodes_from(network.buses.index) G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples()) dendrogram = community.generate_dendrogram(G) if level < 0: level += len(dendrogram) return pd.Series(community.partition_at_level(dendrogram, level=level), index=network.buses.index)
def test_modularity_increase(self): """ Generate a dendrogram and test that modularity is always increasing """ graph = nx.erdos_renyi_graph(1000, 0.01) dendo = co.generate_dendrogram(graph) mods = [ co.modularity(co.partition_at_level(dendo, level), graph) for level in range(len(dendo)) ] self.assertListEqual(mods, sorted(mods))
def comunity_detection_function(datafile, G, show=True, db_name='facebook_combined.csv'): ''' requirment: networkx matplotlib pandas community: https://bitbucket.org/taynaud/python-louvain ''' #community #first compute the best partition partition = community.best_partition(G) values = [partition.get(node) for node in G.nodes()] print("community detection finished!") if show: ''' drawing community ''' ###drawing nodes print("drawing nodes of communites...") size = int(len(set(partition.values()))) pos = nx.spring_layout(G) ###C0 C1 ... are color counter = 1 my_colors = [('C' + str(count)) for count in range(size + counter)] for com in set(partition.values()): list_nodes = [ nodes for nodes in partition.keys() if partition[nodes] == com ] nx.draw_networkx_nodes(G, pos, list_nodes, node_size=10, node_color=str(my_colors[counter])) counter += 1 print("drawing community detection finished!") ## ## ###drawing edges print("drawing network edges...") nx.draw_networkx_edges(G, pos) plt.show() return community.generate_dendrogram(G)
def dendo_community(x): import community G = corr_matrix2graph(x) dendo = community.generate_dendrogram(G) dendo_community = np.array([dendo[0][key] for key in dendo[0].keys()]) sort_index = np.argsort(dendo_community) sorted_x = rearrange_mat(x, sort_index) sorted_x = x[sort_index, :] sorted_x = sorted_x[:, sort_index] return sorted_x, sort_index
def update(self, inputs, adj_mat=None): """Return the partition of the nodes at the given level. A dendrogram is a tree and each level is a partition of the graph nodes. Level 0 is the first partition, which contains the smallest communities, and the best is len(dendrogram) - 1. Higher the level is, bigger the communities are. """ self.graph = nx.from_numpy_matrix(adj_mat) self.dendrogram = community_louvain.generate_dendrogram(self.graph) self.updateLabels(self.level) self.centroids = computeCentroids(inputs, self.labels) return
def louvain (graph): """ Louvain clustering, returns dictionary where each key is the level of clustering and the values are the clustering themselfs as returned by to_clusters_dict method. """ community.__MIN = 1e-12 dendo = community.generate_dendrogram(graph) multilevel = {} for level in range(len(dendo) - 1): tmp = community.partition_at_level(dendo, level) # tmp is a dictionary where keys are the nodes and the values are the set it belongs to multilevel[level] = to_clusters_dict(tmp) return multilevel
def louvain(graph): """ Louvain clustering, returns dictionary where each key is the level of clustering and the values are the clustering themselfs as returned by to_clusters_dict method. """ community.__MIN = 1e-12 dendo = community.generate_dendrogram(graph) multilevel = {} for level in range(len(dendo) - 1): tmp = community.partition_at_level(dendo, level) # tmp is a dictionary where keys are the nodes and the values are the set it belongs to multilevel[level] = to_clusters_dict(tmp) return multilevel
def modularize(edgeGraph, nodeDf, nameOfModularityColumn=u'Community_Lvl_0'): ''' uses the original code of the louvain algorithm to give modularity to a graph ''' #compute the best partition dendrogram = community.generate_dendrogram(edgeGraph, weight='weight') dendroBestPartitionDict = community.partition_at_level(dendrogram, len(dendrogram)-1) #dendroBestPartitionDict = community.best_partition(graph) #add a column to the node data frame so we can add the community values if nameOfModularityColumn not in nodeDf.columns: nodeDf[nameOfModularityColumn] = np.nan #add the community values to the node data frame nodeDf[nameOfModularityColumn] = nodeDf[u'Id'].map(dendroBestPartitionDict) #making sure all 'modularity_class' NaN were deleted return nodeDfCleaner(nodeDf), dendrogram
def infos_per_level(graph, labels, params, verbose=True): dendrogram = louvain.generate_dendrogram(graph) colors = {node: node for node in graph} infos = [] for level in range(len(dendrogram)): colors = { node: dendrogram[level][color] for node, color in colors.items() } info = compute_communities_entropy(graph.nodes, colors, labels, verbose=verbose) infos.append(info) return infos
def run_louvain(experiment_dir): g = nx.read_edgelist(os.path.join(experiment_dir, 'projection.txt'), create_using=nx.DiGraph, data=[('weight', float), ('p_prereq', float), ('p_course', float)]) g = g.to_undirected() d = community.generate_dendrogram(g) level_0 = community.partition_at_level(d, 0) for i in range(max(level_0.values())): print('=' * 40) major = [] for class_id, partition_num in level_0.items(): if partition_num == i: major.append(class_id) print(major) print('=' * 40)
def test_nodes_stay_together(self): """ Test that two nodes in the same community at one level stay in the same at higher level """ g = nx.erdos_renyi_graph(500, 0.01) dendo = co.generate_dendrogram(g) parts = dict([]) for l in range(len(dendo)) : parts[l] = co.partition_at_level(dendo, l) for l in range(len(dendo)-1) : p1 = parts[l] p2 = parts[l+1] coms = set(p1.values()) for com in coms : comhigher = [ p2[node] for node, comnode in p1.items() if comnode == com] self.assertEqual(len(set(comhigher)), 1)
def make_partitions(name='projection_graph.pickle'): """ Find the communities in the network :param name: name of graph pickle file :return: """ G = read(name) print("Generating Partition Dendogram") partition_dendogram = community.generate_dendrogram(G) with open('partition_dendogram.pickle', 'wb') as f: pickle.dump(partition_dendogram, f, protocol=2) return G, partition_dendogram
def study_dendrogram(G, filename): dendrogram = co.generate_dendrogram(G) modularity_at_level = dict() print("Dendrogram has {} levels".format(len(dendrogram))) for level in range(len(dendrogram)): part = co.partition_at_level(dendrogram, level) print("Found {} communities at level {}".format(len(set(part.values())), level)) modularity_at_level[level] = co.modularity(part, G) plt.plot(list(modularity_at_level.keys()), list(modularity_at_level.values()), linestyle='dotted', marker = 'o', markersize=8) plt.xlabel("l - Level") plt.ylabel("Q - Modularity") if filename: plt.savefig("drawings/"+filename) plt.show() return dendrogram
def identify_clusters(graph, louvain_level=-1): """ Identifies clusters in the given NetworkX Graph by Louvain partitioning. The parameter louvain_level controls the degree of partitioning. 0 is the most granular partition, and granularity decreases as louvain_level increases. Since the number of levels can't be known a priori, negative values "count down" from the max - ie, -1 means to use the maximum possible value and thus get the largest clusters """ dendrogram = community.generate_dendrogram(graph) if louvain_level < 0: louvain_level = max(0, len(dendrogram) + louvain_level) if louvain_level >= len(dendrogram): #print("Warning [identify_clusters]: louvain_level set to {}, max allowable is {}. Resetting".format(louvain_level, len(dendrogram)-1), file=sys.stderr) louvain_level = len(dendrogram) - 1 #print("Cutting the Louvain dendrogram at level {}".format(louvain_level), file=sys.stderr) return community.partition_at_level(dendrogram, louvain_level)
def get_community(weight): """ 进行图聚类,发现社区 weight: 选择哪个变量作为权重 """ FG = nx.Graph() FG.add_weighted_edges_from(graph_data[['from_id', 'to_id', weight]].values) result = pd.DataFrame({'id': list(FG.nodes)}) print('node number: %s' % len(result)) dendrogram = community.generate_dendrogram(FG) for level in range(len(dendrogram)): the_partition = community.partition_at_level(dendrogram, level) result['%s_label_%s' % (weight, level)] = list(the_partition.values()) return result
def run_louvain(experiment_dir): g = get_networkx_graph(experiment_dir) g = g.to_undirected() d = community.generate_dendrogram(g) level_0 = community.partition_at_level(d, 0) majors = [] for i in range(max(level_0.values())): major = [] for class_id, partition_num in level_0.items(): if partition_num == i: major.append(class_id) majors.append(major) with open(os.path.join(experiment_dir, 'louvain.json'), 'w') as outfile: json.dump(majors, outfile, indent=4)
def add_cluster_labels_to_nodes(nodes_pdf, edges_pdf, weight_col='lift'): """ Decorate node_pdf with columns marking the cluster(s) each node belongs to, using the Louvain algorithm. These cluster columns are added to nodes_pdf as a side effect. """ import networkx as nx G = nx.Graph() elist = [(r['from'], r['to'], r[weight_col]) for i, r in edges_pdf.iterrows()] G.add_weighted_edges_from(elist) dendro = community_louvain.generate_dendrogram(G) for level in range(0, len(dendro)): cluster_level_name = f"level_{level}_cluster" partition = community_louvain.partition_at_level(dendro, level) nodes_pdf[cluster_level_name] = [ partition[x] for x in nodes_pdf['id'] ] # [partition[node_id[x]] for x in nodes_pdf['label']]
def extract_network_metrics(mdg, ts, team=True): met = {} dsg = extract_dpsg(mdg, ts, team) if team : pre = 'full:' else: pre = 'user:'******'nodes_count'] = dsg.number_of_nodes() met[pre+'edges_count'] = dsg.number_of_edges() met[pre+'density'] = nx.density(dsg) met[pre+'betweenness'] = nx.betweenness_centrality(dsg) met[pre+'avg_betweenness'] = float(sum(met[pre+'betweenness'].values()))/float(len(met[pre+'betweenness'].values())) met[pre+'betweenness_count'] = nx.betweenness_centrality(dsg, weight='count') met[pre+'avg_betweenness_count'] = float(sum(met[pre+'betweenness_count'].values()))/float(len(met[pre+'betweenness_count'].values())) met[pre+'betweenness_effort'] = nx.betweenness_centrality(dsg, weight='effort') met[pre+'avg_betweenness_effort'] = float(sum(met[pre+'betweenness_effort'].values()))/float(len(met[pre+'betweenness_effort'].values())) met[pre+'in_degree'] = dsg.in_degree() met[pre+'avg_in_degree'] = float(sum(met[pre+'in_degree'].values()))/float(len(met[pre+'in_degree'].values())) met[pre+'out_degree'] = dsg.out_degree() met[pre+'avg_out_degree'] = float(sum(met[pre+'out_degree'].values()))/float(len(met[pre+'out_degree'].values())) met[pre+'degree'] = dsg.degree() met[pre+'avg_degree'] = float(sum(met[pre+'degree'].values()))/float(len(met[pre+'degree'].values())) met[pre+'degree_count'] = dsg.degree(weight='count') met[pre+'avg_degree_count'] = float(sum(met[pre+'degree_count'].values()))/float(len(met[pre+'degree_count'].values())) met[pre+'degree_effort'] = dsg.degree(weight='effort') met[pre+'avg_degree_effort'] = float(sum(met[pre+'degree_effort'].values()))/float(len(met[pre+'degree_effort'].values())) usg = dsg.to_undirected() dendo = co.generate_dendrogram(usg) if len(dendo)>0 and isinstance(dendo, list): partition = co.partition_at_level(dendo, len(dendo) - 1 ) met[pre+'partitions'] = {} for com in set(partition.values()): members = [nodes for nodes in partition.keys() if partition[nodes] == com] for member in members: met[pre+'partitions'][member] = com met[pre+'louvain_modularity'] = co.modularity(partition, usg) else: met[pre+'louvain_modularity'] = None connected_components = nx.connected_component_subgraphs(usg) shortest_paths = [nx.average_shortest_path_length(g) for g in connected_components if g.size()>1] if len(shortest_paths) > 0: met[pre+'avg_distance'] = max(shortest_paths) else: met[pre+'avg_distance'] = None return met
def test_nodes_stay_together(self): """ Test that two nodes in the same community at one level stay in the same at higher level """ g = nx.erdos_renyi_graph(500, 0.01) dendo = co.generate_dendrogram(g) parts = dict([]) for l in range(len(dendo)): parts[l] = co.partition_at_level(dendo, l) for l in range(len(dendo) - 1): p1 = parts[l] p2 = parts[l + 1] coms = set(p1.values()) for com in coms: comhigher = [ p2[node] for node, comnode in p1.items() if comnode == com ] self.assertEqual(len(set(comhigher)), 1)
def louvain(G): dendo = lvcm.generate_dendrogram(graph=G, weight='weight', resolution=7., randomize=True) partition = lvcm.partition_at_level(dendo, len(dendo) - 1) #a = set(partition.values()) print(partition) #partition = community_louvain.best_partition(G) #print(set(partition.values())) #print(len(set(partition.values()))) out = defaultdict(list) for k, v in partition.items(): out[v].append(k) print(out)
def add_louvain_communities(graph, all_levels=False, random_state=None): graph_communities = graph.copy().to_undirected( ) # Louvain algorithm only deals with undirected graphs if all_levels: # We add the first communities detected, The dendrogram at level 0 contains the nodes as keys # and the clusters they belong to as values. dendrogram = generate_dendrogram(graph_communities, random_state=random_state) partition = dendrogram[0] cnodes = set(partition.values()) graph.add_nodes_from(cnodes, NodeType='subcommunity') nx.set_node_attributes(graph, partition, 'parent') # The dendrogram at level 1 contains the new community nodes and the clusters they belong to. # We change the cluster names to differentiate them from the cluster names of the first clustering # result. Then, repeat the same procedures for the next levels. cluster_child_parent = dendrogram[1] for key, value in cluster_child_parent.items(): cluster_child_parent[key] = '{0}_{1}'.format(1, value) cnodes = set(cluster_child_parent.values()) graph.add_nodes_from(cnodes, NodeType='subcommunity') nx.set_node_attributes(graph, cluster_child_parent, 'parent') for level in range(2, len(dendrogram)): cluster_child_parent = dendrogram[level] cluster_child_parent2 = { '{0}_{1}'.format(level - 1, key): '{0}_{1}'.format(level, value) for (key, value) in cluster_child_parent.items() } cnodes = set(cluster_child_parent2.values()) if level < len(dendrogram) - 1: graph.add_nodes_from(cnodes, NodeType='subcommunity') else: graph.add_nodes_from(cnodes, NodeType='community') nx.set_node_attributes(graph, cluster_child_parent2, 'parent') # Update nodes clusters else: communities = best_partition(graph_communities, random_state=random_state) # compound nodes to add to hold communities cnodes = set(communities.values()) graph.add_nodes_from(cnodes, NodeType='community') nx.set_node_attributes(graph, communities, 'parent') return graph
def test_mode(): G = nx.Graph() G.add_edges_from([(1, 2), (1, 3), (2, 4), (2, 3), (4, 5), (5, 3), (5, 8), (8, 9), (9, 7), (7, 6), (6, 3), (2, 5), (8, 7)]) pos = nx.spring_layout(G) nx.draw_networkx_nodes(G, pos, label=True) partition = community.best_partition(G) nx.draw_networkx_edges(G, pos) nx.draw_networkx_labels(G, pos) plt.show() values = [partition.get(node) for node in G.nodes()] ''' drawing community ''' ###drawing nodes print("drawing nodes of communites...") size = int(len(set(partition.values()))) pos = nx.spring_layout(G) ###C0 C1 ... are color counter = 1 my_colors = [('C' + str(count)) for count in range(size + counter)] for com in set(partition.values()): list_nodes = [ nodes for nodes in partition.keys() if partition[nodes] == com ] nx.draw_networkx_nodes(G, pos, list_nodes, node_color=str(my_colors[counter])) counter += 1 print("drawing community detection finished!") ## ## ###drawing edges print("drawing network edges...") nx.draw_networkx_edges(G, pos) nx.draw_networkx_labels(G, pos) plt.show() return community.generate_dendrogram(G)
def calc_louvain(adj_matrix, level=0, return_c_graph=False): nx_G = nx.from_numpy_array(adj_matrix) dendro = louvain.generate_dendrogram( nx_G, randomize=False, random_state=0) #Maybe set randomize True #print(dendro) #asdasd level = len(dendro) - level - 1 if level < 0: raise Exception("The given Level is too deep. The maximum is: " + str(len(dendro) - 1)) communities = louvain.partition_at_level(dendro, level) number_communities = max(communities, key=lambda x: communities[x]) + 1 # Maybe unnecessary after some code rework and unification community_list = [] for i in range(number_communities): grp_list = [] for grp in communities: if communities[grp] == i: grp_list.append(grp) else: if grp_list: community_list.append(grp_list) community_level_G = louvain.induced_graph(communities, nx_G) if return_c_graph: c_level_graph = nx.adjacency_matrix(community_level_G) else: c_level_graph = None inv_dendro = [] for dct in dendro: inv_dct = {} for k, v in dct.items(): inv_dct.setdefault(v, []).append(k) inv_dendro.append(inv_dct) return community_list, c_level_graph, dendro, inv_dendro
def extract_louvain_modularity(g): met = {} usg = g.copy() isolated = nx.isolates(usg) usg.remove_nodes_from(isolated) dendo = co.generate_dendrogram(usg) if len(dendo)>0 and isinstance(dendo, list): partition = co.partition_at_level(dendo, len(dendo) - 1 ) met['partitions'] = {} for com in set(partition.values()): members = [nodes for nodes in partition.keys() if partition[nodes] == com] for member in members: met['partitions'][member] = com met['modularity'] = co.modularity(partition, usg) # for node in isolated: # met['partitions'][node] = None else: met['partitions'] = None met['modularity'] = None return met
def preprocess(): data = sio.loadmat('f_data/phishing_2013_filter.mat') phish_data = data['phish'] prefix_data = data['networks'] # computeWeightToFile('f_data/weight.mat', phish_data) G = genGraphFromFile('f_data/weight.mat') # print 'load file success' # S = ComuputeSimilarity(phish_data) #G = nx.Graph() #genGraph(phish_data, G) # nx.write_gml(G, 'data/graph') # nx.draw(G) # partition = communityDetect(G) # partition = readResult("data/partition1") dendo = community.generate_dendrogram(G) # print len(dendo) # print 'partition sucess', len(dendo) # filename = "f_data/partition" for level in range(len(dendo)): partition = community.partition_at_level(dendo,level) print 'size', len(set(partition.values())) saveResult(filename + str(level), partition)
def predict(self): # pylint:disable=E0202 """Predict using community structure If two nodes belong to the same community, they are predicted to form a link. This uses the Louvain algorithm, which determines communities at different granularity levels: the finer grained the community, the higher the resulting score. This needs the python-louvain package. Install linkpred as follows: $ pip install linkpred[community] """ try: import community except ImportError: raise ImportError("Module 'community' could not be found. " "Please install linkpred as follows:\n" "$ pip install linkpred[community]") res = Scoresheet() dendogram = community.generate_dendrogram(self.G) for i in range(len(dendogram)): partition = community.partition_at_level(dendogram, i) communities = defaultdict(list) weight = len(dendogram) - i # Lower i, smaller communities for n, com in partition.items(): communities[com].append(n) for nodes in communities.values(): for u, v in all_pairs(nodes): if not self.eligible(u, v): continue res[(u, v)] += weight return res
def gen_clusters(edges_file, resolution=dflt_resolution): with open(edges_file, "rb") as fp: G = nx.read_weighted_edgelist(fp) dendrogram = community.generate_dendrogram(G, resolution=0.25) len_d = len(dendrogram) print("{} items in dendrogram".format(len_d)) gids2names.load_groups_file("data/groups.txt") for level in range(len_d): print() partition = community.partition_at_level(dendrogram, level) modularity = community.modularity(partition, G) print("partition at level {} is\n{}".format(level, pformat(partition))) print("modularity at level {} is {}".format(level, modularity)) for com in set(partition.values()): list_nodes = sorted([nodes for nodes in partition.keys() if partition[nodes] == com]) print("nodes: {}".format(json.dumps(list_nodes))) print(" groups:") for gid, name in gids2names.generate_group_names( group_ids_list=list_nodes): print(" {} {}".format(gid, name))
def load(screen_name=None, user_id=None, force_db_update = False, force_twitter_update=False, debug=False): ''' Main entry point into gravitty module. Should be used by importing gravitty and calling gravitty.load('<your_screen_name'). Please see the readme at github.com/ericjeske/gravitty for mandatory setup instructions and api requirements. The load function will make every attempt to load data from cache sources (mongoDB) before using twitter's api. It is, however, suggested that multiple twitter api keys are utilized with this app to avoid rate limiting restrictions. By default, running this function will return a json object that can be parsed by d3.js to create a community graph. Additional information, including the raw twitter data, parsed twitter data, user similarity, community clustering dendrogram, community analytics data, community networkx graph, and community json object, can be returned by passing in debug=True. Also, by default, this app will create two pickled objects, one containing the debug data described above, the other containing the community json file. Subsequent calls for the same user will use this data to save time (and api calls). To override the use of pickled data, use force_db_update = True. Data for each follower will be pulled from mongoDB if possible, otherwise it will be pulled from twitter. To do a clean-slate download, downloading everything from twitter, use force_twitter_update = True. ''' if screen_name == None and user_id == None: raise Exception('Please enter an id or name') # Assume that if screen_name was not provided (only user id) then a # pickle has not been created. if screen_name is not None: ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH) sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT # Check to see if there are pickles for the user. Note that this will # be overriden if force_db_update is set to true if os.path.isfile(sn_file_debug) and debug \ and not force_twitter_update and not force_db_update: return pickle.load(open(sn_file_debug, 'rb')) if os.path.isfile(sn_file) \ and not force_twitter_update and not force_db_update: return pickle.load(open(sn_file, 'rb')) # Use api credentials from files located in the API_PATH. ABS_API_PATH = os.path.join(os.path.dirname(__file__), API_PATH) apis = oauth_login(ABS_API_PATH) # Try to start up a mongo database connection to cache data in try: conn = pymongo.MongoClient("localhost", 27017) except pymongo.errors.ConnectionFailure: print 'Please run mongod and re-run program' raise Exception('DBError') db = conn[DB_NAME] # Get the target user's data from either the screen_name or user_id user_data = get_user_data(db, apis[0], name = screen_name, uid = user_id, force = force_twitter_update) # If the user is protected (or has more than the maximum # followers/friends), then return an error if user_data == None: print 'Was unable to access data for %s / %s' % (screen_name, user_id) raise Exception('TargetError') user_info, user_tweets, followers, following, user_lists = user_data # Using the target user's list of followers (user ids), get the same # information we just got for the target user for each of its followers raw_df = get_follower_data(db, apis, followers, force = force_twitter_update) # Filter the dataframe for inactive users. Then parse the raw dataframe # to extract the relevant features from the raw data df = parse_dataframe( filter_dataframe(raw_df) ) # With the features in hand, calculate the latent similarity between each # set of users. See similarity.py for more detail on the calculations of # this similarity metric. # The resulting dataframe will be a square matrix indexed/columned by # user_id and contain the undirected edge weights between each pair of # users. df_similarity = make_similarity_dataframe(df) # Make an undirected representing the relationship between each user, # if any. Each node ID is the user ID, each edge weight is equal to the # similarity score between those two users. graph = make_graph(df, df_similarity) # Using the louvain method, find communities within the weighted graph. # The returned dendrogram is a list of dictionaries where the values of # each dictionary are the keys of the next dictionary. The length of the # dendrogram indicates the number of levels of community clusters # detected. dendrogram = generate_dendrogram(graph) # Add a final mapping to the dendrogram that maps everyone into the # same community. They are, after all, followers of the same user. dendrogram.append({k:0 for k in dendrogram[-1].values()}) # Modify the dataframe to contain columns titled 'cid + <level>'. Each # column contains the community id's for that level for each user. # Also, this is a convenient time to calculate graph modularity at each # level so produce that here as well. df, modularity = get_community_assignment(df, graph, dendrogram) num_levels = len(dendrogram) # For each community at each level of the dendrogram, find the topics, # sentiment, biggest influencers, etc. for each. data = get_community_analytics(df, graph, num_levels, community_modularity = modularity) # Both the mentioned and most connected users fields from the community # analytics function are user ids. Turn them into screen names. data = get_screen_names(data, 'mentioned', df, db, apis[0]) data = get_screen_names(data, 'most_connected', df, db, apis[0]) # Close the database connection. It is no longer needed. conn.close() # Create a networkx graph where each node represents a community. Edges # represent membership into larger communities at the next level up ( # down?) the dendrogram and have no edge weights. The data obtained in # the previous steps from community_analytics is loaded into the # attributes of each node. community_graph = create_community_graph(data, dendrogram) # Parse this graph into a json representation for use & consumption by # d3.js community_json = create_community_json(community_graph, user_info) # Just in case we don't have the screen name, grab it. if screen_name is None: screen_name = user_info['screen_name'] # Pickle the objects for reuse. ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH) sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT pickle.dump((raw_df, df, df_similarity, dendrogram, data, community_graph, community_json), open(sn_file_debug, 'wb')) pickle.dump(community_json, open(sn_file, 'wb')) # If debug is true, return all of the precusor objects along with the json if debug: return (raw_df, df, df_similarity, dendrogram, data, community_graph, community_json) # Otherwise return the json object return community_json
k4_cores = sorted(nx.connected_component_subgraphs(nx.k_core(graph, k = 4)), key = lambda c: c.number_of_nodes(), reverse = True) kmax_cores = sorted(nx.connected_component_subgraphs(nx.k_core(graph, k = max_k)), key = lambda c: c.number_of_nodes(), reverse = True) print 'k4 cores sizes:', map(nx.number_of_nodes, k4_cores) print 'kmax cores sizes:', map(nx.number_of_nodes, kmax_cores) k4_mod = modularity(k4_cores, graph) kmax_mod = modularity(kmax_cores, graph) print 'k4 mod', k4_mod print 'kmax mod', kmax_mod k4_wcc = wcc1(k4_cores[0], graph) kmax_wcc = wcc1(kmax_cores[0], graph) print 'k4 wcc', k4_wcc print 'kmax wcc', kmax_wcc dendro = comm.generate_dendrogram(graph) louvain_steps = [] for level in range(len(dendro)): partition = comm.partition_at_level(dendro, level) clusters = {} for key, value in sorted(partition.iteritems()): clusters.setdefault(value, []).append(key) communities = [] for key, value in clusters.iteritems(): if len(value) > 0: communities.append(graph.subgraph(value)) louvain_steps.append([len(set(partition.values())), modularity(communities, graph)])
# set input variables inputFile = str(sys.argv[1]) doResolution = 1. weighted = False if(len(sys.argv) > 2): doResolution = float(sys.argv[2]) sys.stderr.write("Using resolution " + str(doResolution) + ".\n") # read data from edges input file G = networkx.Graph() # create a new undirected graph G = networkx.read_edgelist(inputFile, nodetype=int, data=(('weight',int))) # read as int-weighted # G = networkx.read_edgelist(inputFile, nodetype=int) # read as unweighted sys.stderr.write("Done reading.\n") # do community detection and get dendrograph of communities dendo = community.generate_dendrogram(G, part_init=None, resolution=doResolution, weight='weight') # store communities at different levels parts = {} for level in range(0, len(dendo)): parts[level] = community.partition_at_level(dendo, level) levels = len(dendo) # just do plain community detection instead of nested variant #levels = 1 #parts[0] = community.best_partition(G) # find communities # output header to stdout sys.stdout.write("Id") communitySize = {} for level in range(0, levels):