def _fit(self, parameters): global _current_clusters self.graph = _filter_graph( _base_graph[0].copy(), node_weight_threshold=self.node_weight_threshold, edge_weight_threshold=self.edge_weight_threshold, sem_weight_threshold=self.sem_weight_threshold) if self.communities == 0: self.potential_clusters = [ sorted(clq) for clq in nx.find_cliques(self.graph) if len(clq) >= 3 ] else: if _current_clusters[1][1:] == parameters[1:]: self.potential_clusters = [ sorted(clq) for clq in nx.k_clique_communities( self.graph, 3, cliques=_current_clusters[0]) ] else: self.potential_clusters = [ sorted(clq) for clq in nx.k_clique_communities(self.graph, 3) ] _current_clusters[0] = self.potential_clusters _current_clusters[1] = parameters
def test_zachary(): z = nx.karate_club_graph() # clique percolation with k=2 is just connected components zachary_k2_ground_truth = set([frozenset(z.nodes())]) zachary_k3_ground_truth = set([ frozenset([ 0, 1, 2, 3, 7, 8, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33 ]), frozenset([0, 4, 5, 6, 10, 16]), frozenset([24, 25, 31]) ]) zachary_k4_ground_truth = set([ frozenset([0, 1, 2, 3, 7, 13]), frozenset([8, 32, 30, 33]), frozenset([32, 33, 29, 23]) ]) zachary_k5_ground_truth = set([frozenset([0, 1, 2, 3, 7, 13])]) zachary_k6_ground_truth = set([]) assert set(k_clique_communities(z, 2)) == zachary_k2_ground_truth assert set(k_clique_communities(z, 3)) == zachary_k3_ground_truth assert set(k_clique_communities(z, 4)) == zachary_k4_ground_truth assert set(k_clique_communities(z, 5)) == zachary_k5_ground_truth assert set(k_clique_communities(z, 6)) == zachary_k6_ground_truth
def test_overlaping_K5(): G = nx.Graph() G.add_edges_from(combinations(range(5), 2)) # Add a five clique G.add_edges_from(combinations(range(2,7), 2)) # Add another five clique c = list(nx.k_clique_communities(G, 4)) assert_equal(c,[frozenset([0, 1, 2, 3, 4, 5, 6])]) c= list(nx.k_clique_communities(G, 5)) assert_equal(c,[])
def findClaimID(self, cliqueSize, density, mean=0.1, std=0.05): # STEP1: allocate subnets for fraud rings if self.subset is None: c = nx.k_clique_communities(self.G, cliqueSize) N = len(self.G) nodeList = None for count, n in enumerate(list(c)): print 'Community = ', count, ' Size = ', len(n) if nodeList is None: nodeList = n elif len(n) > (mean - std) * N and len(n) < (mean + std) * N: nodeList = n self.subset = list(nodeList) if self.subset: self.fraudRingModifier(density=density) # STEP2: calculate network KPIs c = nx.k_clique_communities(self.G, cliqueSize) maximum = 0.0 for count, n in enumerate(list(c)): N_claim = 0. N_part = 0. for NI in n: if self.G.node[NI]['label'].split('_')[0] == 'ClaimID': N_claim += 1.0 else: N_part += 1.0 for NI in n: ratio = N_claim / N_part self.G.node[NI]['modularityClass'] = count + 1 self.G.node[NI]['fraudScore'] = ratio if maximum < ratio: maximum = ratio # STEP3: assign fraud scores c = nx.k_clique_communities(self.G, cliqueSize) for count, n in enumerate(list(c)): for NI in n: self.G.node[NI]['fraudScore'] /= maximum self.G.node[NI]['fraudScore'] *= 100 self.G.node[NI]['fraudScore'] = int( self.G.node[NI]['fraudScore'])
def report_communities(graph: networkx.Graph): communities = {} for community_id, community in enumerate( networkx.k_clique_communities(graph, 3)): for node in community: communities[node] = community_id + 1 # Group actors from same communities together community_to_actors = {} for key, val in communities.items(): if val not in community_to_actors: community_to_actors[val] = [] community_to_actors[val].append(key) # Sort based on the length of the list of actors community_to_actors_sorted = sorted(community_to_actors.items(), key=lambda element: len(element[1]), reverse=True) print("=" * 80) print("COMMUNITIES:") for community in community_to_actors_sorted[:10]: print("ID {}, {} actors: {}".format(community[0], len(community[1]), ", ".join(community[1]))) print("=" * 80) # Add as attribute to graph for actor, community_id in communities.items(): graph.node[actor]['community_id'] = community_id
def output_graph_stats(g): """ Clique percolation, along with some other NetworkX statistics about the generated graph that may be useful. """ stat_file = open(OUTPUT_STATS, 'w') stat_file.write("NOTE: graph is treated as an unweighted graph" + "\n\n") stat_file.write(str(nx.info(g)) + "\n\n") stat_file.write("TRANSITIVITY: " + str(nx.transitivity(g)) + "\n\n") clust_coeffs = nx.clustering(g) stat_file.write("NODES WITH CLUST COEFF = 1: " + "\n") for node in clust_coeffs: if clust_coeffs[node] == 1.0: stat_file.write(node + " " + str(g.neighbors(node)) + "\n") stat_file.write("AVG CLUSTERING COEFFICIENT: " + str(nx.average_clustering(g)) + "\n\n") stat_file.write("DEGREE HISTOGRAM: " + str(nx.degree_histogram(g)) + "\n\n") stat_file.write("NODES WITH HIGHEST DEGREE CENTRALITY: " + "\n") stat_file.write( str( sorted(nx.degree_centrality(g).items(), key=operator.itemgetter(1), reverse=True)[:5]) + "\n\n") stat_file.write("4-CLIQUE COMMUNITIES (clique percolation): " + "\n") for clique in nx.k_clique_communities(g, 4): stat_file.write(str(clique) + "\n") stat_file.write("\nMAXIMAL CLIQUES: " + "\n") for clique in nx.find_cliques(g): if len(clique) >= 3: stat_file.write(str(clique) + "\n")
def all_users_group_evolution(self): slots = [] for i in range(1,7): G = self._graph_from_cursor('all_posts_s%d' % i) communities = nx.k_clique_communities(G, 3) communities = sorted(communities, key = lambda c: len(c), reverse = True)[:10] slots.append(communities) for i in range(5): comm1 = slots[i] comm2 = slots[i+1] # remapped = [] # for c1 in comm1: # remapped.append(max(comm2, key=lambda c2: len(set(c1) & set(c2)))) # slots[i+1] = remapped cover = [] for i in range(1,6): row = [] for j in range(10): isl = len(set(slots[i-1][j]) & set(slots[i][j])) n = len(slots[i-1][j]) f = isl*100/n row.append(f) cover.append(row) for i in range(5): print cover[i]
def user_clique(user): G = get_graph(user) c = maxrank(user) print str(len(G.nodes())) + " nodes for user " + str(user) if user in [345,0,21869,18844]: return (user, [[c]]) # do not calculate for large graphs (it takes too long) if len(G.nodes()) > tooManyNodesThreshold: return (user, [[c]]) # find comunities using k_clique_communities() listOfCircles = [] kCliqueComunities = list(nx.k_clique_communities(G,cliqueSize)) for community in kCliqueComunities: # leave only relativly large communities if len(community) >= tooLittleFriendsInCircleThreshold: listOfCircles.append(list(community)) # if no prediction was created, use max pagerank friend if len(listOfCircles) == 0: return (user, [[c]]) else: return (user, listOfCircles)
def plotUnweightedCommunities(G, k_clique, n_nodes, iw): cls = nx.find_cliques(G) communities = list(nx.k_clique_communities(G, k_clique, cliques=cls)) print(len(communities)) pos = nx.graphviz_layout(G) # positions for all nodes plt.figure(figsize=(12, 12)) #colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"] # colors = ["#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040","#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040"] for i in range(len(communities)): nx.draw_networkx_nodes(G, pos, nodelist=list(communities[i]), node_color=colors[i % len(colors)]) nx.draw_networkx_edges(G, pos, width=0.5) # labels #nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif') plt.axis('off') plt.savefig("./communities/unweighted_" + "comm_" + "w" + str(iw) + "k" + str(k_clique) + ".png") # save as png plt.close()
def plotWeightedCommunities(G, W_lim, k_clique, n_nodes): for i in range(0, n_nodes): for j in range(i, n_nodes): if (i != j): print(i, j) if (G[i][j]['weight'] < W_lim): G.remove_edge(i, j) cls = nx.find_cliques(G) communities = list(nx.k_clique_communities(G, k_clique, cliques=cls)) print(len(communities)) pos = nx.graphviz_layout(G) # positions for all nodes plt.figure(figsize=(12, 12)) #colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"] for i in range(len(communities)): nx.draw_networkx_nodes(G, pos, nodelist=list(communities[i]), node_color=colors[i]) nx.draw_networkx_edges(G, pos, width=0.5) # labels #nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif') plt.axis('off') plt.savefig("comm_w_" + str(W_lim) + "k" + str(k_clique) + ".png") # save as png plt.close()
def singleDetection(twitter_api, group): #handle a single group #delete repeated user ids group=list(set(group)) G=nx.Graph() #construct graph for the group by finding all the edges for user_id in group: friends_ids, followers_ids = get_friends_followers_ids(twitter_api, user_id=user_id) friends_ids_str=[str(friend_id) for friend_id in friends_ids] followers_ids_str=[str(follower_id) for follower_id in followers_ids] vertices = list(set(friends_ids_str) & set(group) & set(followers_ids_str)) #find nodes connected to current node in the group edges=[(str(user_id), vertex)for vertex in vertices] G.add_edges_from(edges) #find communites using CPM c=[] k=0 for size in range(3,6): c_original=list(nx.k_clique_communities(G, size)) if len(c_original)>0: c_listed=[list(froz) for froz in c_original] c=c_listed #if find communities using larger k, drop previous ones k=size else: break return G.nodes(), G.edges(), c
def cliqueData(self, cnumber, data, header, sg, k): for n in data: data[n] += [0, 0, 0] com = list(nx.k_clique_communities(sg, k)) header += [ str(k) + '_clique_size', str(k) + '_clique_edges', str(k) + '_clique_density' ] for c in com: count = self.edgesCount(list(c)) for n in c: if len(c) > data[n][-3]: data[n][-3] = len(c) data[n][-2] = count data[n][-1] = count / (data[n][-3] * data[n][-3] - data[n][-3]) header += [str(k) + '_min_edges', str(k) + '_min_edges_ratio'] for n in data: if data[n][-1] > 0: data[n].append(np.ceil(data[n][-3] * cnumber[n] / 2)) data[n].append(data[n][-3] / data[n][-1]) else: data[n] += [0, 0] return data, header
def k_comm(Gn): comm_list_G = list(nx.k_clique_communities(Gn, 3)) # k=3 comm_list = [] for item in comm_list_G: item = map(int, item) comm_list.append(list(item)) return comm_list
def plotWeightedCommunities(G, W_lim, k_clique, n_nodes): for i in range(0,n_nodes): for j in range(i,n_nodes): if(i!=j): if(G[i][j]['weight'] < W_lim): G.remove_edge(i,j) cls = nx.find_cliques(G) communities = list(nx.k_clique_communities(G,k_clique ,cliques = cls)) print(len(communities)) pos=nx.graphviz_layout(G) # positions for all nodes plt.figure(figsize=(12,12)) #colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"] for i in range(len(communities)): nx.draw_networkx_nodes(G,pos,nodelist=list(communities[i]),node_color=colors[i]) nx.draw_networkx_edges(G,pos,width=0.5) # labels nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif') plt.axis('off') plt.savefig("comm_w_"+str(W_lim)+"k"+str(k_clique)+".png") # save as png plt.close()
def popular_group_centralities(): path = '../sna/all_posts_s5.graphml' G = nx.read_graphml(path) cliques = nx.k_clique_communities(G, 3) cliques = list(cliques) popular_clique = sorted(cliques, key=lambda x:len(x), reverse=True)[0] G = G.subgraph(popular_clique) degree = nx.degree_centrality(G) pagerank = nx.pagerank(G) closeness = nx.closeness_centrality(G) betweeness = nx.betweenness_centrality(G) sd = pandas.Series(degree) sp = pandas.Series(pagerank) sc = pandas.Series(closeness) sb = pandas.Series(betweeness) print sd.describe() print sp.describe() print sc.describe() print sb.describe() sd.plot.hist(bins=100).get_figure().savefig('../sna/pg_degree.png') sp.plot.hist(bins=100).get_figure().savefig('../sna/pg_pagerank.png')
def plotUnweightedCommunities(G, k_clique, n_nodes,iw): cls = nx.find_cliques(G) communities = list(nx.k_clique_communities(G,k_clique ,cliques = cls)) print(len(communities)) pos=nx.graphviz_layout(G) # positions for all nodes plt.figure(figsize=(12,12)) #colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"] # colors = ["#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040","#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040"] for i in range(len(communities)): nx.draw_networkx_nodes(G,pos,nodelist=list(communities[i]),node_color=colors[i%len(colors)]) nx.draw_networkx_edges(G,pos,width=0.5) # labels nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif') plt.axis('off') plt.savefig("./communities/unweighted_"+"comm_"+"w"+str(iw)+"k"+str(k_clique)+".png") # save as png plt.close()
def test_isolated_K5(): G = nx.Graph() G.add_edges_from(combinations(range(0, 5), 2)) # Add a five clique G.add_edges_from(combinations(range(5, 10), 2)) # Add another five clique c = list(nx.k_clique_communities(G, 5)) assert_equal(set(c), set([frozenset([0, 1, 2, 3, 4]), frozenset([5, 6, 7, 8, 9])]))
def graph_communities(self): communities = nx.k_clique_communities(self.G, 6) communities_index = {} for i, group in enumerate(communities): for member in group: communities_index[member] = i + 1 return communities_index
def kcliques_to_html(G): kcliques = list(networkx.k_clique_communities(G, 2)) #pdb.set_trace() kcliques_colors = [random.randint(0,1000000)*len(l) for l in kcliques] for clique in kcliques: color = kcliques_colors[kcliques.index(clique)] for node in clique: G.node[node]['kclique'] = color
def clique(gr): G=g_load(gr) li=list(list(networkx.k_clique_communities(G,2))[0]) mv=majority_vote.list_majority(li,G) #print(mv) #s=sorted(mv.items(),key=operator.itemgetter(1),reverse=True) return mv
def getCliques(self, G='default', nclique=4): if G == 'default': G = self.G cliques = list(nx.k_clique_communities(G, nclique)) return cliques
def get_coalesced_communities(g, no_overlap=False): average_clique_size = int(get_average_clique_size(g)) communities = map(lambda c: Community(c), nx.k_clique_communities(g, average_clique_size)) communities = coalesce_communities(communities, .7) communities2 = map(lambda c: Community(c), nx.k_clique_communities(g, 3)) communities2 = coalesce_communities(communities2, .7) communities = communities + filter(lambda c: len(c.members) <= 10, communities2) communities = coalesce_communities(communities, .7) communities = filter(lambda c: len(c.members) > 1, communities) if not no_overlap: return communities members = set() overlapping_subs = set() for c in communities: for s in c.members: if s in members: overlapping_subs.add(s) else: members.add(s) for overlapped in overlapping_subs: best_weight = 0 best_comm = None overlapped_comms = filter(lambda c: overlapped in c.members, communities) for community in overlapped_comms: source = overlapped weight = 0 for target in community.members: if g.has_edge(source, target): weight += g[source][target]['weight'] if weight > best_weight: best_weight = weight best_comm = community for c in overlapped_comms: c.members.remove(overlapped) best_comm.members.add(overlapped) return communities
def nxCommunity(): import networkx as nx import matplotlib.pyplot as plt G = nx.connected_caveman_graph(6, 4) #first compute the best partition c = list(nx.k_clique_communities(G, 5)) print c nx.draw(G) plt.show()
def main(): # reads from file and building the LastFm network artists, net = ma.read_data() listening = ma.get_node_listening(artists) print_vectors(artists) communities = list(nx.k_clique_communities(net, 5)) community_statistics(communities, 'k-clique') community_listening(communities, listening, artists, 'k-clique')
def report_communities(graph): communities = {node: cid + 1 for cid, community in enumerate(networkx.k_clique_communities(graph, 3)) for node in community} pos = networkx.circular_layout(graph) networkx.draw(graph, pos, font_size=8, labels={v: str(v) for v in graph}, cmap=plt.get_cmap("rainbow"), node_color=[communities[v] if v in communities else 0 for v in graph]) plt.savefig("communities.png")
def find_k_cliques(self, k = 3): for kc in nx.k_clique_communities(self.G, k): self.k_cliques.append(set(kc)) self.k_clique_groups = self.find_common_subsets(self.k_cliques) if self.verbose: print 'K-CLIQUES' print 'Found %s k-cliques' %(len(self.k_cliques)) print 'Found %s k-clique groups' %(len(self.k_clique_groups)) return len(self.k_clique_groups)
def test_zachary(): z = nx.karate_club_graph() # clique percolation with k=2 is just connected components zachary_k2_ground_truth = set([frozenset(z.nodes())]) zachary_k3_ground_truth = set([frozenset([0, 1, 2, 3, 7, 8, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33]), frozenset([0, 4, 5, 6, 10, 16]), frozenset([24, 25, 31])]) zachary_k4_ground_truth = set([frozenset([0, 1, 2, 3, 7, 13]), frozenset([8, 32, 30, 33]), frozenset([32, 33, 29, 23])]) zachary_k5_ground_truth = set([frozenset([0, 1, 2, 3, 7, 13])]) zachary_k6_ground_truth = set([]) assert set(k_clique_communities(z, 2)) == zachary_k2_ground_truth assert set(k_clique_communities(z, 3)) == zachary_k3_ground_truth assert set(k_clique_communities(z, 4)) == zachary_k4_ground_truth assert set(k_clique_communities(z, 5)) == zachary_k5_ground_truth assert set(k_clique_communities(z, 6)) == zachary_k6_ground_truth
def spatial_major(in_folder, cliqueThresh): for file in os.listdir(in_folder): if file != '.DS_Store': #weird MAC thing path = in_folder + file G = getJsonNet(path) G = nx.Graph(G) #can't do cliques on directed nets if G.order() > 0: n = nx.graph_clique_number(G) if n > cliqueThresh: print file for c in nx.k_clique_communities(G, n): print list(c)
def k_clique_CD(graph, cut_str, k_range): print '########\tK-CLIQUE CD WITH K RANGE = ' + str(k_range) + '\t########' # num_cliques = nx.number_of_cliques(actor_network_cut3) # print len(num_cliques) for k in k_range: print '\n########\t'+str(k)+'-CLIQUE CD '+cut_str+' START\t########' output = nx.k_clique_communities(graph, k) output_communities_list = list(map(list, output)) # per covertire tutte le communities in liste print '########\t'+str(k)+'-CLIQUE CD '+cut_str+' COMPLETE\t########' output_file = OUTPUT_DIRECTORY_KCLIQUE+"/kclique_actor_"+str(k)+"_"+cut_str+".txt" print '> numero di community trovate: ' + str(len(output_communities_list)) serialize_communities(output_communities_list, output_file)
def make_modules_multik(graph, k=None): """make modules with networkx k-clique communities and annotate network""" if k is None: k = [2, 3, 4, 5, 6] communities = dict() for k_val in list(k): cliques = list(nx.k_clique_communities(graph, k_val)) cliques = [list(i) for i in cliques] communities[k_val] = cliques for i, clique in enumerate(cliques): for node in clique: graph.node[node]['k_' + str(k_val)] = i return graph, {k: list(v) for k, v in communities.iteritems()}
def k_clique_analysis(G,k_list,out_path): for k in k_list: c = list(nx.k_clique_communities(G, k)) print(type(c[0])) quit() c = list(map(list,c)) out = open(out_path + str(k) + "_clique.dat","w") for community in c: out.write("%d\t[" % c.index(community)) for node in community: out.write('"%s",' % node) out.write("]\n") out.close()
def main(): # reads from file and building the LastFm network artists, net = ma.read_data() listening = ma.get_node_listening(artists) genre_vectors, artist_vector = build_vectors(artists) communities_kcliques = list(nx.k_clique_communities(net, 5)) community_statistics(communities_kcliques, 'k-clique') community_listening(communities_kcliques, listening, artists, 'k-clique', genre_vectors, artist_vector) communities_demon = read_demon() community_statistics(communities_demon, 'demon') community_listening(communities_demon, listening, artists, 'demon', genre_vectors, artist_vector)
def findCommunites(threshold=0.5, sector=None, k=5, force=False): th = re.sub(r'([0-9]*)\.([0-9]*)',r'\1\2',str(threshold)) if sector != None: graphInFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_nx_"+sector+"_th"+th+".xml" graphOutFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx_"+sector+"_th"+th+"_k"+str(k)+".xml" outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx_"+sector+"_th"+th+"_k"+str(k)+".csv" else: graphInFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_graph_nx_th"+th+".xml" graphOutFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx"+"_th"+th+"_k"+str(k)+".xml" outFilename = PROCESSED_FILE_LOC + PREFIX + CRITERIA + "stock_communities_nx"+"_th"+th+"_k"+str(k)+".csv" print "reading graph from file: ", graphInFilename print "writing graph with community info to file: ", outFilename print "writing community details in csv format to file: ", outFilename if force or not isfile(graphOutFilename): g = nx.read_graphml(graphInFilename) #freq = findFreqOfCliquesInGraph(g) #plotHistFromDict(freq) comm = nx.k_clique_communities(g, k) communities = [] for c in comm: communities.append(c) numCommunities = len(communities) print "number of communities found: ", numCommunities colors = range(numCommunities) i = 0 for c in communities: for v in c: g.node[v]['cluster'] = colors[i] + 1 i += 1 nx.write_graphml(g, graphOutFilename) import csv with open(outFilename, "wb") as f: writer = csv.writer(f, delimiter='|', quotechar="'", quoting=csv.QUOTE_MINIMAL) writer.writerow(["sector", "symbol", "name", "cluster"]) for v in g: writer.writerow([g.node[v]['sector'], g.node[v]['symbol'], g.node[v]['name'], g.node[v]['cluster']]) results = PROCESSED_FILE_LOC + "results.csv" with open(results, "a") as f1: f1.write(str(dt.datetime.today()) + "," + outFilename + "," + str(numCommunities) + "," + str(calculateModularity(graphOutFilename)) + "\n") drawGraph(graphOutFilename, "gt")
def predict_user(G, cliqueSize=5, tooLittleFriendsInCircleThreshold=10, #tooManyNodesThreshold=220 ): # find comunities using k_clique_communities() listOfCircles = [] kCliqueComunities = list(nx.k_clique_communities(G,cliqueSize)) for community in kCliqueComunities: # leave only relativly large communities if len(community) >= tooLittleFriendsInCircleThreshold: listOfCircles.append(list(community)) return listOfCircles
def DetectionComNX(graphe,path): import networkx as nx graph=nx.read_edgelist(path+graphe) kcliques=nx.k_clique_communities(graph,3,nx.find_cliques(graph)) c=list(kcliques) commClus={} for indx,val in enumerate(c): temp=[] for j in val: temp.append(int(j.replace("n",""))) commClus[indx]=temp return commClus
def calculateModularity(): #very slow!, exp(N) complexity community_growth = [] communities = [] max_cliques = 20 for k in tqdm(range(max_cliques),desc='Running k-clique modularity algorithm'): communities.append(list(nx.k_clique_communities(G_gn,k+2))) community_growth = [len(list(x)) for x in communities] plt.plot([k for k in range(2,max_cliques+2)],community_growth,'*') plt.xlabel('clique size (k)') plt.ylabel('Qty Clique Communities') plt.title('Number of k-clique communities vs clique size via percolation method') plt.grid() plt.show() return [k for k in range(2,max_cliques+2)],communities
def get_comunidades(grafico): comunidades = list(nx.k_clique_communities(grafico.to_undirected, 3, nx.find_cliques(grafico.to_undirected()))) #si todo salio bien, los sets de miembros son solamente de ids de usuario sin numeros enteros agregados g = nx.Graph() #ahora volvemos a crear un grafico para visualizar todo for i in comunidades: g.add_nodes_from(i) for i in comunidades: g.add_edges_from(list(permutations(i,2))) pos = nx.spring_layout(g) plt.axis('off') nx.draw_networkx(g, pos) plt.show() return comunidades
def make_modules(graph, k=3, prefix="module"): """make modules with networkx k-clique communities and annotate network""" premodules = list(nx.k_clique_communities(graph, k)) # reverse modules so observations will be added to smallest modules premodules = list(enumerate(premodules)) premodules.reverse() modules = dict() seen = set() for i, module in premodules: # process module module = module-seen seen = seen | module modules[prefix+"_"+str(i)] = module for node in module: graph.node[node][prefix] = i return graph, modules
def week4(): path = "D:\Dropbox\PhD\My Work\Algorithms\@Machine Learning\Lectures\Social Network Analysis\Week 4_Community Structure\wikipedia.gml" wiki = nx.read_gml(path) wiki = wiki.to_undirected() # cliques cid, cls = max(nx.node_clique_number(wiki).iteritems(), key=operator.itemgetter(1)) print 'clique', cid, ' size:', cls # k-cores kcs = nx.k_core(wiki) print 'k-core size:', len(kcs.node) # community cs = list(nx.k_clique_communities(wiki, 2)) ratio = (len(cs[0]) + 0.0) / len(wiki.node) print 'community ratio:', ratio
def get_statistics(): """ uses data statistics to find the most important papers in the collection. """ with open(MAIN_FOLDER + "network.pkl", "r") as f: network = pickle.load(f) betweenness_dict = networkx.betweenness_centrality(network.to_undirected()) sorted_betweenness = sorted(betweenness_dict.items(), key=operator.itemgetter(1), reverse=True) betweenness = [x[0] for x in sorted_betweenness[:5]] pagerank_dict = networkx.pagerank(network.to_undirected()) sorted_pagerank = sorted(pagerank_dict.items(), key=operator.itemgetter(1), reverse=True) pagerank = [x[0] for x in sorted_pagerank[:5]] hits_dict = networkx.hits(network.to_undirected()) sorted_hits = sorted(hits_dict[0].items(), key=operator.itemgetter(1), reverse=True) hits = [x[0] for x in sorted_hits[:5]] in_degree_dict = network.in_degree() sorted_in_degree = sorted(in_degree_dict.items(), key=operator.itemgetter(1), reverse=True) in_degree = [x[0] for x in sorted_in_degree[:5]] community_dict = [] for k in xrange(20): community_dict += list(networkx.k_clique_communities(network.to_undirected(), 21 - k)) if community_dict: break modules = [] for index, community in enumerate(community_dict): modules.append([]) for p in community: modules[index].append(p) statistics = {'in_degree': in_degree, 'betweenness': betweenness, 'hits': hits, 'pagerank': pagerank, 'modules': modules} with open(MAIN_FOLDER + "statistics.pkl", "wa") as f: pickle.dump(statistics, f)
def main(): """docstring for main""" g = build_graph(sys.argv[1]) global nodes_ global nrows_, ncols_ cliques = nx.k_clique_communities(g_, 5) img = np.zeros(shape=(nrows_, ncols_), dtype=np.uint16) for i, c in enumerate(cliques): pos = [nodes_[x] for x in c] # pos = np.array( pos ) print(pos) if len(pos) < 4: continue for p in pos: cv2.circle(img, p, 1, 10 * i) # cv2.putText( img, str(i), p, cv2.FONT_HERSHEY_COMPLEX, 0.1, i ) # cv2.polylines( img, np.int32( [ pos ] ), False, i ) plt.imshow(img, cmap='gray', interpolation='none', aspect='auto') plt.colorbar() plt.savefig('corr.png')
def main(): """docstring for main""" g = build_graph( sys.argv[1] ) global nodes_ global nrows_, ncols_ cliques = nx.k_clique_communities( g_, 5 ) img = np.zeros( shape=(nrows_,ncols_), dtype=np.uint16 ) for i, c in enumerate(cliques): pos = [ nodes_[x] for x in c ] # pos = np.array( pos ) print( pos ) if len(pos) < 4: continue for p in pos: cv2.circle( img, p, 1, 10*i ) # cv2.putText( img, str(i), p, cv2.FONT_HERSHEY_COMPLEX, 0.1, i ) # cv2.polylines( img, np.int32( [ pos ] ), False, i ) plt.imshow( img, cmap='gray', interpolation = 'none', aspect = 'auto' ) plt.colorbar( ) plt.savefig( 'corr.png' )
def clusterGraph(G): print("Clustering and Colorizing Graph") c = list(nx.k_clique_communities(G, COMMUNITY_SIZE=10)) usedColors = list() for cluster in c: goldenRatio = 0.618033988749895 h = random.random() color = "0" while color in usedColors or color == "0": h += goldenRatio h %= 1 rgb = colorsys.hsv_to_rgb(h, 0.5, 0.95) color = "#{0:02x}{1:02x}{2:02x}".format(int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255)) usedColors.append(color) for nodeID in cluster: print("Giving", nodeID, "Color", color) G.node[nodeID]['color'] = color print("Used Colors:", usedColors) return G
def printCommunities(graph): communities = {} for community_id, community in enumerate(nx.k_clique_communities(graph, 3)): for node in community: communities[node] = community_id + 1 community_actors = {} for key, val in communities.items(): if val not in community_actors: community_actors[val] = [] community_actors[val].append(key) print("\nCommunities:") print("\t5 biggest communities size: ", end="") for comm in sorted(community_actors.items(), key= lambda item: len(item[1]), reverse=True)[:5]: print(len(comm[1]), end=",") print("\n\tMembers of biggest community:") for comm in sorted(community_actors.items(), key=lambda item: len(item[1]), reverse=True)[:1]: print("\t{}".format(comm[1])) for actor, community_id in communities.items(): graph.node[actor]['community_id'] = community_id
def get_ego_kclique_communities(ego): if ego in [5881, 12800]: print 'In get_ego_kclique_communities, skipping ego', ego return {} ego_kcc_dmp = join(DATA_DIR, 'cliques', 'kcc_%s.zip'%ego) if os.path.exists(ego_kcc_dmp): with zipfile.ZipFile(ego_kcc_dmp, mode='r') as zf: ccs = json.loads(zf.read('files1.json')) else: ego_cliques = get_ego_cliques(ego) print 'Processing k-clique communities: nx.find_cliques, ego:', ego G = load_ego_graph(ego) ccs = [list(cc) for cc in nx.k_clique_communities(G, 6, cliques=ego_cliques)] try: import zlib compression = zipfile.ZIP_DEFLATED except: compression = zipfile.ZIP_STORED json_rslt = json.dumps(ccs, ensure_ascii=False, indent=True) with zipfile.ZipFile(ego_kcc_dmp, mode='w') as zf: zf.writestr('files1.json', json_rslt, compress_type=compression) return ccs
def all_users_timeslots(self): stats = [] for i in range(1,7): G = self._graph_from_cursor('all_posts_s%d' % i) for nd in G.nodes(): G.node[nd]['color'] = 0 j = 1 cl = [] for clique in nx.k_clique_communities(G, 3): cl.append(clique) for nd in clique: G.node[nd]['color'] = j j += 1 n = len(G.nodes()) e = len(G.edges()) d = pandas.Series(nx.degree_centrality(G)).mean() eg = pandas.Series(nx.eigenvector_centrality(G, max_iter=1000)).mean() closeness = {} for node in random.sample(G.nodes(),100): closeness[node] = nx.closeness_centrality(G, node) c = pandas.Series(closeness).mean() b = pandas.Series(nx.betweenness_centrality(G, k=20)).mean() stats.append((n,e,d,eg,c,b,cl)) nx.write_graphml(G, '../sna/all_posts_s%d.graphml'%i) print 'nodes, edges, m. degree, m. eigenvector, m. closeness, m. betweeness' for s in stats: print '%s, %s, %s, %s, %s, %s' % (s[0], s[1], s[2], s[3], s[4], s[5])
def calculateSimilarities(self): """ TODO: Break up graph into communities, if a user A belongs to a community C, similarities[A] = list of (all other nodes in C, score=1) tuples """ min_community_size = 5 # Calculate similarities and populate similarities dict here print "Finding CommunitySimilarity! Yay!" communities = list(nx.k_clique_communities(self.yelpGraph, min_community_size)) print "Number of test communities: %d" % len(communities) total_size = 0 for c in communities: #print len(c) total_size += len(c) print "Total size: %d" % total_size # for each community count = 0 for community in communities: print "We've entered a new community!" # for every node in the community for user in community: count += 1 self.similarities[user] = list() # make a list of all other tuples that are in the community (skip self) for friend in community: if friend != user: self.similarities[user].append((friend, 1)) print "The count is %d" % count print "Number of pairs: %d .... should be equal to 2995" % len(self.similarities) # Write similarity map to file pickle.dump(self.similarities, open( "communitySim.p", "wb" ) )
if not G.has_edge(terms[i], terms[j]): # add edge if it is not there already G.add_edge(terms[i], terms[j]) G.edge[terms[i]][terms[j]]['freq'] = 1 # the count is 1 else: G.edge[terms[i]][terms[j]][ 'freq'] += 1 # existing edge, increment the count f.close() #remove all edges with a freq less than 3 remove = [] for N1, N2 in G.edges(): # for each edge if G.edge[N1][N2]['freq'] < 3: remove.append((N1, N2)) # add it to the 'remove' list G.remove_edges_from(remove) # filter #find all maximal cliques cliques = list(nx.find_cliques(G)) sorted_cliques = sorted(cliques, key=len, reverse=True) # sort cliques by size print(sorted_cliques[0]) #find all k-cliques communities kcliques = list(nx.k_clique_communities(G, 3)) sorted_cliques = sorted(kcliques, key=len, reverse=True) # sort cliques by size print(sorted_cliques[0])
tupl = sorted_Pr[ii] print tupl #Hits and Authorities h, a = nx.hits(G_hybrid) number = 5 print "hubs nodes" sorted_h = sorted(h.items(), key=operator.itemgetter(1), reverse=True) for ii in range(number): tupl = sorted_h[ii] print tupl number = 5 print "authority nodes" sorted_a = sorted(a.items(), key=operator.itemgetter(1), reverse=True) for ii in range(number): tupl = sorted_a[ii] print tupl #Cliques ratioDenominator = 1000 smallestSize = nx.number_of_nodes(G_hybrid) / ratioDenominator communities = list(nx.k_clique_communities(G_hybrid, smallestSize)) for community in communities: print list(community)
for userId in list(submission['UserId']): # read graph filename = str(userId) + '.egonet' G = read_nodeadjlist(egonetFolderName + filename) # do not calculate for large graphs (it takes too long) if len(G.nodes()) > tooManyNodesThreshold: print('skipping user ' + str(userId)) continue else: print('predicting for user ' + str(userId)) # find comunities using k_clique_communities() listOfCircles = [] kCliqueComunities = list(nx.k_clique_communities(G,cliqueSize)) for community in kCliqueComunities: # leave only relativly large communities if len(community) >= tooLittleFriendsInCircleThreshold: listOfCircles.append(list(community)) # populate prediction string predictionString = '' for circle in listOfCircles: for node in circle: predictionString = predictionString + str(node) + ' ' predictionString = predictionString[:-1] + ';' predictionString = predictionString[:-1] # if no prediction was created, use 'all friends in one circle' if len(listOfCircles) > 0:
for c in communities: if name in c: nodes.append({ 'name':name, 'group':g }) g = g + 1 return nodes G = nx.Graph() topAuthorNames = [a[0] for a in authorsCounter.most_common(50)] # top 40 authors G.add_nodes_from(topAuthorNames) # assign nodes, edges are assigned in cooccurrence_links() collaborators = collaborators_matrix(authors) cooccurrences = {'nodes':[], 'links':[]} cooccurrences['links'] = cooccurrence_links(authorsCounter, collaborators) communities=list(nx.k_clique_communities(G,3)) # detect communities and than assign groups cooccurrences['nodes'] = cooccurrence_nodes(authorsCounter, 50, communities) # dest = '/Users/asif/Sites/scholars/neuromodulation/cooccurrences.json' # f = open(dest, 'w+') # f.write(json.dumps(cooccurrences)) # f.close() ########################################################### # Edge-bindings collaborators ########################################################### # Edge bindings def collaborators_bindings(groupCounters, collaborators): mostfreq = [x[0] for x in groupCounters.most_common(50)] collaborators_bindings = []
def calculate_k_clique(G, K): communities = nx.k_clique_communities(G, K) if verbose: print "k-cliques " + str(K) write_csv_groups('./data/results/kClique' + str(K) + '.csv', communities)
#g.remove_bridges("/home/kazem/weakc/out",0,",") g.export_G_to_csv(1) #diam_ = nx.algorithms.distance_measures.diameter(g.G) #print "diameter of G: %d" %diam_ if(0): g = WCG(100) edge_thr = 1800 #let's read the contact duration between pairs reader = csv.reader(open("/Users/kazemjahanbakhsh/Downloads/graph_cd.txt"), delimiter=',') for line in reader: if float(line[2]) > edge_thr: g.G.add_edge(int(line[0]),int(line[1]),weight=float(line[2])) #plot G #g.plot_graph(g.G) c = list(nx.k_clique_communities(g.G, 4)) print c if(0): g = WCG(100) g.build_csv("/home/kazem/data/FB/facebook-links.txt", 0, "\t") #g.findWhiskers("/home/kazem/weakc/out","/home/kazem/weakc/export.csv",0,",") #H = g.G.subgraph([60512, 60513, 60514, 60515, 60516, 60517, 60518, 60519, 60520, 60508, 60509, 60510, 60511]) #H = g.G.subgraph([63008, 63687, 54988, 54989, 54990, 54991, 55065, 55066, 63356, 63357, 63007]) #H = g.G.subgraph([54380, 54381, 54382, 54383, 54384, 54385, 54386, 54387, 54388, 35581]) #H = g.G.subgraph([62496, 60099, 60100, 45811, 45812, 45813, 50652, 61054, 61055]) #H = g.G.subgraph([48870, 48871, 48872, 49773, 49774, 50609, 50610, 50611, 50612]) H = g.G.subgraph([45958, 45959, 45960, 45961, 45962, 45963, 49551, 49082, 49083, 49084, 49085, 49086, 59970, 59971, 51115, 51116, 62238, 62239, 61477, 55530, 61143, 59418, 59419, 59583, 59055, 59052, 59053, 59054, 41127, 47426, 47427, 47428, 47429, 57631, 49120, 49121, 49122, 54528, 49119, 63321, 59490, 52851, 52852, 63483, 52888, 52889, 52890, 59523, 59522, 54816, 60288, 62493, 62437, 60287, 59792, 59793, 59794, 59790, 59791, 37891, 37892, 15773, 36262, 59569, 60724, 60614, 28727, 61408, 55076, 55075, 29972, 48870, 48871, 48872, 49773, 49774, 50609, 50610, 50611, 50612, 62496, 60099, 60100, 45811, 45812, 45813, 50652, 61054, 61055, 54380, 54381, 54382, 54383, 54384, 54385, 54386, 54387, 54388, 35581, 63008, 63687, 54988, 54989, 54990, 54991, 55065, 55066, 63356, 63357, 63007, 60512, 60513, 60514, 60515, 60516, 60517, 60518, 60519, 60520, 60508, 60509, 60510, 60511, 57953, 57801, 57802, 57946, 53501, 53502, 53503]) if(0): g = WCG(2048) g.build_csv("/Users/kazemjahanbakhsh/Downloads/facebook-links.txt", 0, "\t")
def community(G,number): c = list(nx.k_clique_communities(G,number)) return c