def compute(self): for filename in os.listdir('./test_egonets'): index = filename.split('.egonet')[0] new_file = './edges/' + index + '.egonet.edges' G = snap.TUNGraph.New() G.AddNode(int(index)) for node in self.adj_list[index]: G.AddNode(int(node)) for line in file(new_file): line = line.strip('\n') x = line.split(' ') x = map(lambda x: int(x), x) if not G.IsEdge(x[1], x[0]): G.AddEdge(x[0], x[1]) print 'Computing for ' + index CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G, CmtyV) for Cmty in CmtyV: for NI in Cmty: print NI, print G.Clr()
def community_detection(G): ''' See snap docs for details. ''' # Only for large networks - I got 3000 node communities on a 9000 node graph... # modularity = snap.CommunityCNM(G, CmtyV) edgefile = "data/toronto_knn_20.csv" outfile = "data/CGN_knn_20.csv" dictfile = "data/CGN_dict_knn_20.json" edge = pd.read_csv(edgefile, ',', header=0) graph = nx.from_pandas_edgelist(edge, source='r1', target='r2') CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G, CmtyV) community_id = 0 comm_dict = dict() for Cmty in CmtyV: comm_dict[community_id] = [] for c in Cmty: comm_dict[community_id].append(c) community_id += 1 with open(outfile, "w+") as f: for idx, assignment in tqdm(assignments.iteritems()): print len(assignment) f.write(", ".join(assignment)) f.write("\n") with open(dictfile, "w+") as f: json.dump(partition, f)
def detect_community(G, id_to_title): print('dectect community ....') CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G, CmtyV) f = open('./community_detection/assignment2_Nhom1_TuToanChien.txt', 'w') i = 0 for Cmty in CmtyV: if i == 100: break f.write('Community ' + str(i) + ': \n') j = 0 for NI in Cmty: if j == 10: break title = id_to_title[NI] f.write(str(NI) + '\t \t' + str(title) + '\n') j += 1 i += 1 f.close()
def run(self, data, seed=None): if data.is_directed(): raise UnsupportedException("only undirected graph is supported") if seed is not None: self.logger.info("seed ignored") UGraph = convert.to_snap(data) CmtyV = snap.TCnComV() timecost, modularity = utils.timeit( lambda: snap.CommunityGirvanNewman(UGraph, CmtyV)) clusters = {} i = 0 for Cmty in CmtyV: clusters[i] = [] for NI in Cmty: clusters[i].append(NI) i += 1 self.logger.info( "Made %d clusters in %f seconds. modularity of the graph is %f" % (len(clusters), timecost, modularity)) result = {} result['timecost'] = timecost result['runname'] = self.name result['dataname'] = data.name result['meta'] = self.get_meta() result['modularity'] = modularity result['clusters'] = clusters save_result(result) self.result = result return self
def analizzaGirvanNewman(pfPaj, pfAINN, pfMod): # prende un grafo in formato Pajek # restituisce le comunita come ID Nome Comunita g = snap.LoadPajek(snap.PUNGraph, pfPaj) comunita = snap.TCnComV() modularity = snap.CommunityGirvanNewman(g, comunita) dMod = {} # {numero : classe} classe = 0 for com in comunita: # print('comunita {} = '.format(classe), end='' ) for nodo in com: # print('{} '.format(nodo), end='') dMod.update({nodo: classe}) classe += 1 # print('') print('Numero di comunita analizzaGirvanNewman: {} modularity: {}'.format( classe, modularity)) dNum = {} with open(pfAINN, 'rb') as fAINN: for line in fAINN: autID, autNum, autNome = line.rstrip().split('\t') autNum = int(autNum) dNum.update({autNum: [autID, autNome]}) # print(dNum) with open(pfMod, 'wb') as fMod: for autNum in dNum: fMod.write('{}\t{}\t{}\r\n'.format(dNum[autNum][0], dNum[autNum][1], dMod[autNum])) return classe # numero di comunita trovate
def GirvanNewmanMethodBySnap(graph): CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(graph, CmtyV) for Cmty in CmtyV: print("Community: ",CmtyV) # for NI in Cmty: # print(NI) print("The modularity of the network is %f" % modularity)
def out_modularity_gn(g): """Girvan-Newman method""" community_vector = snap.TCnComV() modularity = snap.CommunityGirvanNewman(g, community_vector) for community in community_vector: print "Community: " for i in community: print i print "The modularity of the network is %f" % modularity
def community_gn(G): CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G,CmtyV) ret_list = [] for Cmty in CmtyV: temp = [] for NI in Cmty: temp.append(NI) ret_list.append(temp) return ret_list
def comDetect(algorithm, clusterCommands, Graph, conn, cur): CmtyV = snap.TCnComV() before_time = time.time() if algorithm == "gn": modularity = snap.CommunityGirvanNewman(Graph, CmtyV) if algorithm == 'cnm': modularity = snap.CommunityCNM(Graph, CmtyV) print "Total handling time is: ", (time.time() - before_time) createTable(clusterCommands, CmtyV, conn, cur) print "The modularity of the network is %f" % modularity
def findCommunity(): #%% make a submission submission = pd.read_csv(submissionFolderName + 'sample_submission.csv') #submission = pd.read_csv(submissionFolderName + 'train_ID.csv') for userId in list(submission['UserId']): # read graph filename = str(userId) + '.egonet' G = snap.TUNGraph.New() read_nodeadjlist(egonetFolderName + filename, G) # do not calculate for large graphs (it takes too long) if G.GetNodes() > tooManyNodesThreshold: print 'skipping user ' + str(userId) continue else: print 'predicting for user ' + str(userId) # visualization plot = plotting(G, snap.gvlNeato) plot.run('gviz_plot_{}'.format(userId), title='UserID = {}'.format(userId)) # find comunities by using GirvanNewman listOfCircles = [] CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G, CmtyV) for Cmty in CmtyV: #print 'Community' # leave only relativly large communities if len(Cmty) >= tooLittleFriendsInCircleThreshold: listOfCircles.append(list(Cmty)) for NI in Cmty: #print NI continue print 'The modularity of the network is %f' % modularity # populate prediction string predictionString = '' for Cmty in listOfCircles: for NI in Cmty: predictionString = predictionString + str(NI) + ' ' predictionString = predictionString[:-1] # if no prediction was created, use 'all friends in one circle' if len(listOfCircles) > 0: submission.ix[submission['UserId'] == userId, 'Predicted'] = predictionString submission.to_csv(submissionFolderName + str(submissionNumber) + '.csv', index=False)
def calculate_communities(G): g = networkx_to_snappy(G) CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(g, CmtyV) nodes_communities = {} # {node: [community]} for i, Cmty in enumerate(CmtyV): for NI in Cmty: nodes_communities.setdefault(NI, []) nodes_communities[NI].append(i + 2) return nodes_communities
def run(self): snap.DelSelfEdges(self.graph) community_list = snap.TCnComV() snap.CommunityGirvanNewman(self.graph, community_list) self.community_list = list() for community in community_list: cmty = list() for node in community: cmty.append(node) self.community_list.append(cmty)
def detectCommunities(self, algo="CNM", snapgraph=None, H=None, nxGraph=None, write=True, printout=True): ''' Detect communities using Clauset-Newman-Moore modularity-based greedy algorithm or Girvan-Neman betweeness-centrality based algorithm. Returns results as a dictionary and saves a text-file version. ''' # Creates a SNAP Graph Object if none is provided if snapgraph == None: snapgraph, H = self.buildSnapGraph(networkxGraph=nxGraph) # G = snapgraph # Detect community and calculate modularity networkxGraph = H.Graph start = time.time() CmtyV = snap.TCnComV() if algo == "CNM": modularity = snap.CommunityCNM(snapgraph, CmtyV) else: modularity = snap.CommunityGirvanNewman(snapgraph, CmtyV) if printout == True: i = 1 for Cmty in CmtyV: print(f"Community {i}: ") print(list(Cmty)) # communities.append(list(Cmty)) print("\n") i += 1 print("The modularity of the network is %f" % modularity) print(f"Time : {time.time()-start} seconds") # Save to text file at savepath if write = True if write == True: self.writeTxt(CmtyV, modularity) # Create a dataframe of community assignments communities = [] for i, cmty in enumerate(CmtyV): for c in cmty: communities.append((c, i + 1)) community_df = pd.DataFrame(communities, columns=['hashtag_id', 'CNM_Label']) # Export labeleed graph as a JSON file. nx.set_node_attributes( networkxGraph, community_df.set_index('hashtag_id').to_dict('index')) print("Community labelled graph exported as ", self.name_arg, ".json") H.exportGraph("JSON")
def labelCommunities(graph): communities = {} CmtyV = snap.TCnComV() snap.CommunityGirvanNewman(graph, CmtyV) community = 1 for Cmty in CmtyV: for NI in Cmty: if Cmty.Len() == 1: communities[NI] = 0.0 else: communities[NI] = community community += 1 return communities
def extract_community(topK = 5): """community analysis Return: [[eventID] x topK] """ CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(Graph, CmtyV) communities = defaultdict(int) # {group index : community ID} arr = [] for Cmty in CmtyV: if topK <= 0: break curr = [] for NI in Cmty: communities[NI] = topK curr.append(NI) topK -= 1 arr.append(curr) print("The modularity of the network is %f" % modularity) return communities, arr
def same_community(G, n1, n2, method="CNM"): deleted = False if G.IsEdge(n1, n2): G.DelEdge(n1, n2) deleted = True CmtyV = snap.TCnComV() if method == "CNM": modularity = snap.CommunityCNM(G, CmtyV) elif method == "GN": modularity = snap.CommunityGirvanNewman(G, CmtyV) for cmty in CmtyV: cmty_set = set(cmty) if n1 in cmty and n2 in cmty: if deleted: G.AddEdge(n1, n2) return 1 if deleted: G.AddEdge(n1, n2) return 0
def SnapGirvanNewman(G1): """ Call the Stanford Snap method for Girvan-Newman Algorithm. This method only gives the final result, does not remember the process. The code is modified from Snap users manual. :param G1: (TUNGraph) a undirected graph :return: (list) list of lists (as communities) of node id """ CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G1, CmtyV) communities = [] for Cmty in CmtyV1: commu = [] # use a list instead of TCnComV for NI in Cmty: # NI - Node Id, an integer commu.append(NI) communities.append(commu)
def modularity(graph, index, userId): cmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(graph, cmtyV) df = pd.DataFrame(columns=('Node', 'Community', 'Modularity')) for cmty in cmtyV: if cmty.Len() > minCircleSize: print list(cmty) index += 1 for NI in cmty: df.loc[NI] = [NI, index, modularity] #print 'node: %d, community: %d, modularity: %f' % (NI, index, modularity) df.to_csv(write_DIR + 'modularity_{}.csv'.format(userId), sep=',', index=False) return index
def girvin_neuman_profile_extract(rowData, activityCodeList, index,week): columnList = generateTransition(activityCodeList) G1 = snap.TNGraph.New() checkActivityList = [] # for node1 in activityCodeList: # for node2 in activityCodeList: # a = node1[1] + '-' + node2[1] # if a in rowData.index: # if node1[0] not in checkActivityList: # G1.AddNode(node1[0]) # checkActivityList.append(node1[0]) # if node2[0] not in checkActivityList: # G1.AddNode(node2[0]) # checkActivityList.append(node2[0]) for i in columnList: if i[1] in rowData.index: if rowData[i[1]] > 0: if i[0][0] not in checkActivityList: G1.AddNode(i[0][0]) checkActivityList.append(i[0][0]) if i[0][1] not in checkActivityList: G1.AddNode(i[0][1]) checkActivityList.append(i[0][1]) G1.AddEdge(i[0][0],i[0][1]) G1_undirect = snap.ConvertGraph(snap.PUNGraph,G1) # snap.DrawGViz(G1_undirect, snap.gvlDot, "graphs/week/" + str(week) + "/" + index + ".png", index) CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G1_undirect, CmtyV) noOfCluster = len(CmtyV) clusterList = [] for Cmty in CmtyV: community = [] for NI in Cmty: community.append(NI) clusterList.append(community) return [index, modularity, noOfCluster, clusterList]
import snap import networkx as nx import graphlab as gl import pdb ''' cdr = gl.SFrame.read_csv("Combined.csv", usecols=['Customer','Callee'],column_type_hints=int) cdr = cdr.to_dataframe() g = nx.Graph() g = nx.from_pandas_dataframe(cdr, 'Customer','Callee',['Duration']) f = open("w1_comm.csv","w") comm = community.best_partition(g, resolution=0.6) f.write(str(comm)) f.close() ''' pdb.set_trace() g = snap.LoadEdgeList(snap.PUNGraph, "combined.txt", 1, 2) Cmty = snap.TCnComV() mod = snap.CommunityGirvanNewman(g, Cmty) for i in Cmty: for j in i: print j print "Modularity is ", mod
def get_modularity(Graph): # Uses the Girvan-Newman community detection algorithm based on betweenness centrality on Graph. CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(Graph, CmtyV) return modularity
def get_modularity(Graph): CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(Graph, CmtyV) return modularity
import snap UGraph = snap.LoadEdgeList(snap.PUNGraph, "facebook_combined.txt", 0, 1) CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(UGraph, CmtyV) for Cmty in CmtyV: print "Community: " for NI in Cmty: print NI print "The modularity of the network is %f" % modularity
def compute_girvan_newman(self, graph): communities = snap.TCnComV() modularity = snap.CommunityGirvanNewman(graph, communities) return modularity, communities
import sys inputFile=open(sys.argv[1]) addedNodes={} G1=snap.TUNGraph.New() #Read the graph from the file lineIndex=0 for line in inputFile: words=line.split() source=int(words[0]) destination=int(words[1]) weight=float(words[2]) if source not in addedNodes: G1.AddNode(source) addedNodes[source]=1 if destination not in addedNodes: G1.AddNode(destination) addedNodes[destination]=1 G1.AddEdge(source,destination) #if lineIndex>1000: # break lineIndex+=1 #Run the CNM Community Detection Algorithm. Note that this neglects weights CmtyV=snap.TCnComV() modularity=snap.CommunityGirvanNewman(G1,CmtyV) for Cmty in CmtyV: print "Community:" for NI in Cmty: print NI
for x in pagerank: if pagerank[x] > max_pagerank: max_pagerank = pagerank[x] max_pagerank_id = x print "Maximum PageRank is %f. Node id: %d" % (max_pagerank, max_pagerank_id) ################################################################## ## Part 1 - Step 5 - GirvanNewman single execution time measure ## ################################################################## # Measure the time needed for the execution of the GirvanNewman community detection algorithm # based on betweenness centrality start_time = time.time() community_v = snap.TCnComV() modularity = snap.CommunityGirvanNewman(graph, community_v) #for x in community_v: # print "Community: " # for y in x: # print y #print "The modularity of the network is %f" % modularity print "GirvanNewman - Execution time required: %f seconds" % (time.time() - start_time) # Clear community vector community_v = None ########################################################################## ## Part 1 - Step 6 - Clauset-Newman-Moore single execution time measure ## ########################################################################## # Measure the time needed for the execution of the Clauset-Newman-Moore community detection method.
from graph import * import snap i = 0 clusters = snap.TCnComV() mdlty = snap.CommunityGirvanNewman(graph, clusters) for clu in clusters: file = open("cluster" + str(i) + ".txt", "w") for n in clu: file.write(str(n) + "\n") file.close()
def generate_graph(n_nodes=50, out_degree=None, seed=1): """ This method generates a Graph based on the Barabasi Algorithm and computes several metrics: 1) It finds the Node with the maximum Degree. 2) It finds the Node with the maximum PageRank Score. 3) Calculates communities within the graph by using two different algorithms: a) Girvan - Newman community Detection b) Clauset-Newman-Moore community Detection. :param n_nodes: int. Specifies the number of nodes for the graph to be created. :param out_degree: int. Specifies the outer degree for each node. If None, then a random integer is generated between 5 and 20. :param seed: Int. An integer that is used to generate the same 'random' integer for the out degree. :return: Boolean. Whether the execution time of the specific community detection algorithms is over 10 minutes. """ if out_degree is None: random.seed(seed) out_degree = random.randint(5, 20) print print "Generating Graph with %s Nodes of Out Degree: %s " % (n_nodes, out_degree) # Generating a random graph based on the Barabasi Algorithm. barabasi_graph = snap.GenPrefAttach(n_nodes, out_degree) # Finding the node ID with the maximoun Degree. maximum_degree_node = snap.GetMxDegNId(barabasi_graph) # Iterating in the graph nodes in order to find the Maximum degree for this particular node. for NI in barabasi_graph.Nodes(): if NI.GetId() == maximum_degree_node: print "Node: %d, Maximum Degree %d" % (NI.GetId(), NI.GetDeg()) # Computing the PageRank score of every node in Graph # Setting the ID and the PageRank score to -1. (minimum of both of these is 0) page_rank_id, page_rank_score = -1, -1 # Creating the iterator for the PageRank algorithm. PRankH = snap.TIntFltH() # Calculating the PageRank for every Node. snap.GetPageRank(barabasi_graph, PRankH) # By iterating on each node we find the Node with the maximum PageRank Score. for node in PRankH: if PRankH[node] > page_rank_score: page_rank_score = PRankH[node] page_rank_id = node print print "Node with the Highest PageRank value: " print "Node: %s, PageRank value %s " % (page_rank_id, page_rank_score) print try: start_Girvan_Newman = time.time( ) # setting the timer for the first community detection algorithm. # Calculating Girvan - Newman community Detection Algorithm CmtyV = snap.TCnComV() snap.CommunityGirvanNewman(barabasi_graph, CmtyV) print 'Girvan-Newman community Detection Algorithm: Execution Time: ', time.time( ) - start_Girvan_Newman # Calculating Girvan-Newman community Detection Algorithm start_Clauset_Newman_Moore = time.time( ) # setting the timer for the second community detection algorithm. CmtyV = snap.TCnComV() snap.CommunityCNM(barabasi_graph, CmtyV) print 'Clauset-Newman-Moore community Detection Algorithm: Execution Time: ', time.time( ) - start_Clauset_Newman_Moore print '-' * 100 print '-' * 100 if time.time( ) - start_Girvan_Newman > 10 * 60: # if the total execution time for both algorithms is over 10 # minutes then return False in order to quit the loop that this method will be used in. return False return True except MemoryError: # if we get a memory error during the Community Detection algorithms we set to False in order # to avoid adding more Nodes when running this method in a while loop. return False
def main(): # Load data nodes = pd.read_csv("../data/nodes.csv", sep='\t', index_col=0) # Data in nice form headers = list(nodes.columns) nodes = np.asarray(nodes) # Load social network accordingly if path.exists("../data/youtube.graph"): FIn = snap.TFIn("../data/youtube.graph") social_network = snap.TNGraph.Load(FIn) else: edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) social_network = data2dag(edges, nodes.shape[0]) # Check for self edges for e in social_network.Edges(): if e.GetSrcNId() == e.GetDstNId(): print("Self Loop Found:", e.GetSrcNId()) # CNM Algorithm from snap.py print("Computing Girvan Newman") start = timeit.default_timer() CmtyV = snap.TCnComV() undirected = snap.ConvertGraph(snap.PUNGraph, social_network) #snap.DelSelfEdges(undirected) the_modularity = snap.CommunityGirvanNewman(undirected, CmtyV) stop = timeit.default_timer() node_to_cmty = np.zeros(nodes.shape[0]) cmty_sizes = np.zeros(len(CmtyV)) for i in range(len(CmtyV)): for node in CmtyV[i]: node_to_cmty[node] = i cmty_sizes[i] = len(CmtyV[i]) cmtys = [[node for node in cmty] for cmty in CmtyV] ''' m = 0 for i in range(len(CmtyV)): Nodes = snap.TIntV() for elem in CmtyV[i]: Nodes.Add(int(elem)) m += snap.GetModularity(social_network, Nodes, social_network.GetEdges()) ''' edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) G = nx.Graph() G.add_nodes_from(range(nodes.shape[0])) G.add_edges_from(list(map(tuple, edges))) assert (is_partition(G, cmtys)) print("Calculating Modularity") modul = modularity(G, cmtys) print("Results from Girvan Newman:") print("Modularity:", modul) print("Number of clusters:", len(CmtyV)) print("Time elapsed:", stop - start) # Fun category stuff to do ''' upload_col = headers.index('category') categories = set() for i in range(nodes.shape[0]): categories.add(nodes[i][upload_col]) idx_to_categories = list(categories) print("Number of categories:",len(idx_to_categories)) categories_to_idx = dict() for i in range(len(idx_to_categories)): categories_to_idx[idx_to_categories[i]] = i # Communities and categories cmty_category_count = np.zeros((len(CmtyV),len(idx_to_categories))) for i in range(nodes.shape[0]): cmty_category_count[int(node_to_cmty[i]),categories_to_idx[nodes[i][upload_col]]] += 1 cmty_category_count = cmty_category_count/cmty_sizes[:,np.newaxis] ''' # Create graphs per category ''' plt.figure() for i in range(len(idx_to_categories)): if (str(idx_to_categories[i]) != "nan") and (idx_to_categories[i] != " UNA "): plt.plot(sorted(cmty_category_count[:,i], reverse=True), label=idx_to_categories[i]) plt.title("Category Proportions in Clusters") plt.xlabel("Cluster") plt.ylabel("Proportion") plt.legend(bbox_to_anchor=(1.04,1), loc="upper left") plt.savefig("../figures/category_proportions_clusters.png", bbox_inches="tight") ''' ''' for i in range(cmty_category_count.shape[0]): top_category = np.argmax(cmty_category_count[i]) print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category]) ''' '''
def get_communities_girvan_newman(graph: snap.PUNGraph): CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(graph, CmtyV)