def get_thread_text(comments): "Groups comments into threads, then concatenates the text of each thread." comments.object_id = comments.object_id.astype(int) comments.parent_id = comments.parent_id.astype(int) comments.points = comments.points.astype(float).astype(int) nodes = set(comments.object_id).union(set(comments.parent_id)) commentsGraph = snap.TUNGraph.New() for node in nodes: commentsGraph.AddNode(node) for edge in comments[['object_id', 'parent_id']].values.tolist(): commentsGraph.AddEdge(*edge) commentThreads = snap.TCnComV() snap.GetSccs(commentsGraph, commentThreads) threadText = [] for commentThread in commentThreads: commentsInThread = comments[comments['object_id'].isin(commentThread)] commentsInThread = commentsInThread.comment_text.astype( str) # No more floats in here... #commentsInThread = [c.encode('ascii', 'ignore') for c in commentsInThread] commentsInThread = [ c.decode('ascii', errors='replace').encode('ascii', 'ignore') for c in commentsInThread ] commentsInThread = [htmlParser.unescape(c) for c in commentsInThread] threadText.append(" ".join(commentsInThread)) return " ".join(threadText)
def getStronglyConnectedComponents(Graph, node_to_g): prot_to_SCcomponent = {} Components = snap.TCnComV() snap.GetSccs(Graph, Components) for i, CnCom in enumerate(Components): for node in CnCom: my_prot = node_to_g[node] prot_to_SCcomponent[ my_prot] = i + 1 ##1-index component membership. return prot_to_SCcomponent
def get_component_distribution(ei_graph): """Returns the sizes of strongly connected components. returns: dict of (size of component -> num of such components) https://snap.stanford.edu/snappy/doc/reference/GetSccs.html """ components = snap.TCnComV() snap.GetSccs(ei_graph.base(), components) return Counter(c.Len() for c in components)
def computeStronglyConnectedComponents(graph, outFile): logger.info("Computing Strongly Connected Components") fw_cc = open(outFile, 'w') Components = snap.TCnComV() snap.GetSccs(graph, Components) for CnCom in Components: for item in CnCom: fw_cc.write(str(item) + "\n") fw_cc.write("\n") logger.info("Strongly Connected Components Computed!") logger.info("Strongly Connected Components Exported to " + outFile)
def processNetwork(Graph, id_to_groups): with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f: f.write("RELATED GROUPS GRAPH:\n") f.write('Edges: %d\n' % Graph.GetEdges()) f.write('Nodes: %d\n\n' % Graph.GetNodes()) MxWcc = snap.GetMxWcc(Graph) f.write("MAX WCC:\n") f.write('Edges: %f ' % MxWcc.GetEdges()) f.write('Nodes: %f \n' % MxWcc.GetNodes()) f.write('Node List: ') for node in MxWcc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxWcc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL WCCs:") Components = snap.TCnComV() snap.GetWccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nWcc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) MxScc = snap.GetMxScc(Graph) f.write("\n\nMAX SCC:\n") f.write('Edges: %f ' % MxScc.GetEdges()) f.write('Nodes: %f \n' % MxScc.GetNodes()) f.write('Node List: ') for node in MxScc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxScc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL SCCs:") Components = snap.TCnComV() snap.GetSccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nScc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) f.write('\n\nCLUSTERING AND COMMUNITIES:\n') f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1)) f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1)) Nodes = snap.TIntV() for node in Graph.Nodes(): Nodes.Add(node.GetId()) f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
def sccs(self, returnNodes=True): """ Returns a list of sets of nodes, or just the IDs if returnNodes is false (note that getting the nodes themselves adds overhead) """ sccs = snap.TCnComV() sccList = [] snap.GetSccs(self.rawGraph, sccs) for scc in sccs: sccList.append(SnapUtil.rawComponentToNodeSet(scc, self, returnNodes)) sccList.sort(key=lambda x: len(x),reverse=True) return sccList
def label_nodes_SCCs(G): nodes_sccs = {} # {node: scc_id} snappy_directed = networkx_to_snappy(G, True) components = snap.TCnComV() sccs = snap.GetSccs(snappy_directed, components) for i, CnCom in enumerate(components): for n in CnCom: nodes_sccs[n] = i for node in G.nodes(): m = str(nodes_sccs[node]) G.nodes[node]["SCC"] = m return G
def build_chunk(self): comments = pd.concat([self.oldchunk, self.newchunk]) self.register_users(comments.author.unique()) commentsGraph = self.build_comment_graph(comments) commentThreads = snap.TCnComV() snap.GetSccs(commentsGraph, commentThreads) for commentThread in commentThreads: commentsInThread = comments[comments['object_id'].isin( commentThread)] userIdsInThread = [ self.user_ids[un] for un in commentsInThread.author.values ] for u1, u2 in combinations(set(usersIdsInThread), 2): if not self.usersGraph.IsEdge(u1, u2): self.usersGraph.AddEdge(u1, u2)
def is_uniquely_connected(graph): def is_unique(components): return len(list(filter(lambda comp: comp.Len() > 1, components))) == 1 # First identify if there are strongly connected components in the graph s_components = snap.TCnComV() snap.GetSccs(graph, s_components) unique = is_unique(s_components) # if there is unique strongly connected component then we don't need to search # for the weakly because the graph is connected, otherwise implement the same search # on the weakly components. if not is_unique: w_components = snap.TCnComV() snap.GetWccs(graph, w_components) unique = is_unique(w_components) return unique
def main(): citation = False if citation: folder = '../data/citation_networks/' else: folder = '../data/networks/' AssigneeGraphs = load_networks(folder) print "Generating features..." for AGraph in tqdm(AssigneeGraphs): # Calculate network features Graph = AGraph.Graph node_count = Graph.GetNodes() if node_count <= 0: print "0 nodes", AGraph.company_name continue edge_count = Graph.GetEdges() cc = snap.GetClustCf(Graph) Components = snap.TCnComV() snap.GetSccs(Graph, Components) num_sccs = len(Components) MxScc = snap.GetMxScc(Graph) max_scc_proportion = float(MxScc.GetNodes()) / node_count avg_patents_per_inventor =float(AGraph.metadata['number_of_patents']) / node_count modularity = get_modularity(Graph) net_stats = NetworkStats(node_count=node_count, edge_count=edge_count, clustering_cf=cc, num_sccs=num_sccs, max_scc_proportion=max_scc_proportion, avg_patents_per_inventor=avg_patents_per_inventor, modularity=modularity) AGraph.metadata['node_count'] = node_count AGraph.metadata['edge_count'] = edge_count AGraph.metadata['clustering_cf'] = cc AGraph.metadata['num_sccs'] = num_sccs AGraph.metadata['max_scc_proportion'] = max_scc_proportion AGraph.metadata['avg_patents_per_inventor'] = avg_patents_per_inventor AGraph.metadata['modularity'] = modularity with open(folder + AGraph.company_name + '.json', 'w') as fp: json.dump(AGraph.metadata, fp, sort_keys=True, indent=4) print len(AssigneeGraphs)
def calc_net_stats(folder): stats = [] print "Loading features..." for AGraph in tqdm(AssigneeGraphs): # Calculate network features Graph = AGraph.Graph node_count = Graph.GetNodes() if node_count <= 0: # print "0 nodes", AGraph.company_name continue edge_count = Graph.GetEdges() cc = snap.GetClustCf(Graph) Components = snap.TCnComV() snap.GetSccs(Graph, Components) num_sccs = len(Components) MxScc = snap.GetMxScc(Graph) max_scc_proportion = float(MxScc.GetNodes()) / node_count avg_patents_per_inventor =float(AGraph.metadata['number_of_patents']) / node_count modularity = get_modularity(Graph) net_stats = NetworkStats(node_count=node_count, edge_count=edge_count, clustering_cf=cc, num_sccs=num_sccs, max_scc_proportion=max_scc_proportion, avg_patents_per_inventor=avg_patents_per_inventor, modularity=modularity) stats.append(net_stats) return stats
numVertices = 0 textFile = open('vertices_' + str(sys.argv[1]) + '.txt') lines = textFile.readlines() for line in lines: stripped_line = line.rstrip('\n') G1.AddNode(int(stripped_line)) numVertices = numVertices + 1 # read in edges with open('edges_' + str(sys.argv[1]) + '.txt') as f: for line in f: int_list = [int(i) for i in line.split()] G1.AddEdge(int_list[0], int_list[1]) Components = snap.TCnComV() snap.GetSccs(G1, Components) #for CnCom in Components: # print "Size of component: %d" % CnCom.Len() total = 0 ComponentDist = snap.TIntPrV() snap.GetSccSzCnt(G1, ComponentDist) for comp in ComponentDist: #print "Size: %d - Number of Components: %d" % (comp.GetVal1(), comp.GetVal2()) total = total + comp.GetVal2() connectedness = total / (numVertices * numVertices * 1.0) strValue = str.format("{0:.10f}", connectedness) f = open('connectedness_' + str(sys.argv[1]) + '.txt', 'w')
import snap # import os # Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000) # print os.system("pwd") Graph = snap.LoadEdgeList(snap.PNGraph, "../bitcoin_computed/txedgeunique.txt", 0, 1) G_Nodes = Graph.GetNodes() G_Edges = Graph.GetEdges() print "Graph: Nodes %d, Edges %d" % (G_Nodes, G_Edges) SCComponents = snap.TCnComV() WCComponents = snap.TCnComV() snap.GetSccs(Graph, SCComponents) snap.GetWccs(Graph, WCComponents) MaxWCCNodes = WCComponents[0] MaxSCCNodes = SCComponents[0] # print type(MaxSccNodes) print MaxSCCNodes.Len() print MaxWCCNodes.Len() # Iterate over each edge and check for In, Out SCCHashmap = snap.TIntH() for node in MaxSCCNodes: SCCHashmap.AddKey(node) InOutHashmap = snap.TIntH() for node in MaxWCCNodes:
def get_strongly_connected_components_number(graph: snap.PNGraph): components = snap.TCnComV() snap.GetSccs(graph, components)
print "Approx. effective diameter in " + input_file + " with sampling ", i, " nodes: ", round( diameter[index], 3) index = index + 1 mean = float(sum(diameter) / 3.0) variance = float((pow((diameter[0] - mean), 2) + pow( (diameter[1] - mean), 2) + pow((diameter[2] - mean), 2)) / 2.0) print "Approx. effective diameter in " + input_file + " (mean and variance): ", round( mean, 3), ", ", round(variance, 3) snap.PlotShortPathDistr(Graph1, "shortest_path_plot_" + input_file, "Undirected graph - shortest path", 1000) print "Shortest path distribution of " + input_file + " is in: diam.shortest_path_plot_" + input_file + ".png" largest_component = snap.TCnComV() snap.GetSccs(Graph1, largest_component) largest = 0.0 for item in largest_component: if largest < item.Len(): largest = item.Len() print "" print "Fraction of nodes in largest connected component in " + input_file + ": ", float( largest) / float(final_nodes) snap.PlotSccDistr(Graph1, "conn_components_plot_" + input_file, "Undirected graph - Connected components distribution") print "Component size distribution of " + input_file + " is in: scc.conn_components_plot_" + input_file + ".png"
""" snap.DelDegKNodes(G0,1,0) snap.DelDegKNodes(G0,1,0) snap.DelDegKNodes(G0,1,0) snap.DelDegKNodes(G0,1,0) snap.PrintInfo(G0) DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(G0, DegToCntV) for item in DegToCntV: print "%d nodes with out-degree %d" % (item.GetVal2(), item.GetVal1()) """ """ Components = snap.TCnComV() snap.GetSccs(G0, Components) for CnCom in Components: print "Size of component: %d" % CnCom.Len() """ """ DegToCntV = snap.TIntPrV() snap.GetInDegCnt(G0, DegToCntV) for item in DegToCntV: print "%d nodes with in-degree %d" % (item.GetVal2(), item.GetVal1()) """ """ for outDeg in range(25,3200): for inDeg in range (15,20): snap.DelDegKNodes(G0,outDeg,inDeg)
# Compute degree distribution and save it to an external textfile degree_vertex_count = snap.TIntPrV() s.GetOutDegCnt(u_rndm_graph, degree_vertex_count) file = open("graph_rdm_undirected_degree_distrib.txt", "w") file.write("#----------------------------------\n") file.write("# Degree Distribution \n") file.write("#----------------------------------\n") file.write("\n") for pairs in degree_vertex_count: file.write("vertex degree %d: nmbr vertices with such degree %d \n" % (pairs.GetVal1(), pairs.GetVal2())) file.close() # Compute the sizes of the connected component and save it to an external file Components = snap.TCnComV() snap.GetSccs(u_rndm_graph, Components) file_2 = open("graph_rdm_undirected_connected_compo_sizes.txt", "w") file_2.write("#----------------------------------\n") file_2.write("# Size of Connected Components \n") file_2.write("#----------------------------------\n") file_2.write("\n") file_2.write("Total number of different components = %d\n" % len(Components)) file_2.write("\n") i = 1 for idx, component in enumerate(Components): file_2.write("Size of component #%d : %d\n" % (idx, len(component))) file_2.close() # Output the average of the shortest paths, adding more edges to the graph if it's not connected average_shortest_paths = []
def strongConnectedComponent(clusterCommands, Graph, conn, cur): Components = snap.TCnComV() snap.GetSccs(Graph, Components) createTable(clusterCommands, Components, conn, cur)
takeup_bounds = np.zeros((B,2)) U_exo = gen_exo(data, theta) for b in range(B): print(b) sys.stdout.flush() eps = np.random.logistic(size=U_exo.shape[0]) U_exo_eps = U_exo + eps A = snap.GenConfModel(DegSeqV) start_time = time.time() D = gen_D(U_exo_eps, A, theta[1]) components = snap.TCnComV() snap.GetSccs(D, components) component_lens = [C.Len() for C in components] print('Delta = {}.'.format(max(component_lens))) NE_sets = compute_NE(D, components, A, U_exo_eps, theta[1]) if not D_only else [] end_time = time.time() timing[b] = end_time - start_time num_equil = 1 total_takeup = [] for i,C in enumerate(components): num = len(NE_sets[i]) if not D_only else 0 if num == 0: num_equil = 0 break else: num_equil *= num
def numSccs(G): sccs = snap.TCnComV() snap.GetSccs(G, sccs) return len(sccs)
node_map_SCC = { } # Dictionary mapping each node to the super node that represents in the SCC graph node_map_cmprss = { } # Dictionary mapping each node to the super node that represents it in the compressed graph sets_of_same_descendants = [ ] # List containing sets of nodes, where every node in that set has the same descendants all_descendants = collections.OrderedDict( ) # Dictionary that maps every node to a set of all its descendants ancestors = collections.OrderedDict( ) # Dictionary that maps every node to a set of all its ancestors to_combine = [ ] # List containing sets of nodes, where every node in that set has the same ancestors and descendants all_nodes = [] # List containing all nodes as Node data type rather than ints Components = snap.TCnComV() snap.GetSccs(graph, Components) for CnCom in Components: # If the size of the connected component is greater than 1, create an empty set. # Add all nodes from CnCom into the set, and delete all edges between the nodes in CnCom. if CnCom.Len() > 1: nodes = set() MxScc = snap.GetMxScc(graph) for EI in MxScc.Edges(): nodes.add(EI.GetSrcNId()) nodes.add(EI.GetDstNId()) graph.DelEdge(EI.GetSrcNId(), EI.GetDstNId()) # Create a new node that will represent all nodes from CnCom and # map the new node to all nodes in CnCom
GW = snap.GetMxScc(G2) print(GW.GetNodes()) print(GW.GetNodes(), "lolmero") G4, id2, synset2, _, _, _ = generate_meaning_graph(True, False, True) print(G4.GetNodes()) print(G4.GetEdges()) GW = snap.GetMxScc(G4) print(GW.GetNodes()) print(GW.GetNodes(), "lolmerohyp") G, id, synset, _, _, _ = generate_meaning_graph(False, True, False) print(G.GetNodes()) print(G.GetEdges()) Gs = snap.TCnComV() snap.GetSccs(G, Gs) count = 0 for G3 in Gs: print(G3.Len()) count += 1 if count > 10: break print("lolpoly") paths = [0] * 50 count = 0 for edge in G.Edges(): path = snap.GetShortPath(G2, synset2[id[edge.GetSrcNId()]], synset2[id[edge.GetDstNId()]]) paths[path] += 1 if path == 2:
snap.GetNodeWcc(G, 1, CnCom) print("CnCom.Len() = %d" % (CnCom.Len())) # get nodes in weakly connected components WCnComV = snap.TCnComV() snap.GetWccs(G, WCnComV) for i in range(0, WCnComV.Len()): print("WCnComV[%d].Len() = %d" % (i, WCnComV[i].Len())) for j in range(0, WCnComV[i].Len()): print("WCnComV[%d][%d] = %d" % (i, j, WCnComV[i][j])) # get the size of the maximum weakly connected component MxWccSz = snap.GetMxWccSz(G) print("MxWccSz = %.5f" % (MxWccSz)) # get the graph with the largest weakly connected component GMx = snap.GetMxWcc(G) print("GMx: GetNodes() = %d, GetEdges() = %d" % (GMx.GetNodes(), GMx.GetEdges())) # get strongly connected components SCnComV = snap.TCnComV() snap.GetSccs(G, SCnComV) for i in range(0, SCnComV.Len()): print("SCnComV[%d].Len() = %d" % (i, SCnComV[i].Len())) # get the graph representing the largest bi-connected component GMxBi = snap.GetMxBiCon(G) print("GMxBi: GetNodes() = %d, GetEdges() = %d" % (GMxBi.GetNodes(), GMxBi.GetEdges()))
Edges, 1.0) # Prepare BetweenessList Of List for edge in Edges: betweennessSubList = [edge.GetVal1(), edge.GetVal2(), Edges[edge]] betweennessList.append(betweennessSubList) # Descending Order Sort Betweenness and take highest betweenness betweennessList.sort(key=lambda x: x[-1], reverse=True) # Remove the edge with highest betweenness ''' NOTE: ONLY FIRST ROW USE FOR DELETE EDGES( HIGHEST BETWEENESS )''' GraphkarateclubMaintainForDeleteEdges.DelEdge(betweennessList[0][0], betweennessList[0][1]) '''Compute the modularity of the resultant graph''' Components = snap.TCnComV() # GetSccs For Components snap.GetSccs(GraphkarateclubMaintainForDeleteEdges, Components) Modularity = snap.GetModularity(Graphkarateclub, Components) # Add Modularity Value Append in Global List ModularityList ModularityList.append(Modularity) ''' community structure for which the graph has highest modularity''' if Modularity > CheckModularity: CheckModularity = Modularity CommunityList = Components print "The modularity of the network is %f" % max(ModularityList) '''Output the community structure for which the graph has highest modularity''' for Cmty in CommunityList: print "Community: " for NI in Cmty: print NI