def computeGraphMetrics(GRep, GModel): nddDiff = euclidDist(ndDist(GRep), ndDist(GModel)) knnDiff = euclidDist(knnDist(GRep), knnDist(GModel)) dkDiff = euclidDist(dkDist(GRep), dkDist(GModel)) ccDiff = euclidDist(ccDist(GRep), ccDist(GModel)) ASVals = (ASCoeff(GRep), ASCoeff(GModel)) MxWccVals = (snap.GetMxWcc(GRep).GetNodes(), snap.GetMxWcc(GModel).GetNodes()) effDVals = (snap.GetBfsEffDiam(GRep, 1000, False), snap.GetBfsEffDiam(GModel, 1000, False)) return nddDiff, knnDiff, dkDiff, ccDiff, ASVals, MxWccVals, effDVals
def q1_3_grpah(Graph): n_nodes = Graph.GetNodes() MxWcc = snap.GetMxWcc(Graph) MxScc = snap.GetMxScc(Graph) n_MxWcc = MxWcc.GetNodes() n_MxScc = MxScc.GetNodes() print(" TOTAL : ", n_nodes) print(" DISCONNECTED : ", n_nodes - n_MxWcc) print(" SCC : ", n_MxScc) SCC_nodes = [] for NI in MxScc.Nodes(): SCC_nodes.append(NI.GetId()) num_test = 100 random_sampled_scc = random.sample(SCC_nodes, num_test) num_out = [] num_in = [] for i in range(0, num_test): NodeId = random_sampled_scc[i] BfsTreeOut = snap.GetBfsTree(Graph, NodeId, True, False) BfsTreeIn = snap.GetBfsTree(Graph, NodeId, False, True) num_out.append(BfsTreeOut.GetNodes()) # roughly SCC + OUT num_in.append(BfsTreeIn.GetNodes()) # roughly SCC + IN num_out.sort() num_in.sort() print(" OUT : ", num_out[-1] - n_MxScc) print(" IN : ", num_in[-1] - n_MxScc) num_tendrils = n_MxWcc - n_MxScc - (num_out[-1] - n_MxScc) - (num_in[-1] - n_MxScc) print(" TENDRILS+TUBES : ", num_tendrils)
def bowtie_components(graph, name): """Give sizes of DISCONNECTED, IN, OUT, SCC""" results = {} N = graph.GetNodes() SCC = snap.GetMxScc(graph) n = SCC.GetRndNId() disc = N - snap.GetMxWcc(graph).GetNodes() scc = SCC.GetNodes() SCC_in = snap.GetBfsTree(graph, n, False, True) SCC_out = snap.GetBfsTree(graph, n, True, False) in1 = SCC_in.GetNodes() - scc out = SCC_out.GetNodes() - scc tt = N - disc - scc - in1 - out results["a. SCC"] = scc results["b. IN"] = in1 results["c. OUT"] = out results["d. TENDRILS + TUBES"] = tt results["e. DISCONNECTED"] = disc print 'Total nodes in {} network: {}'.format(name, N) print 'DISCONNECTED: {}'.format(disc) print 'SCC: {}'.format(scc) print 'IN: {}'.format(in1) print 'OUT: {}'.format(out) print 'TENDRILS + TUBES: {}'.format(tt) return results
def SizeOfBowtieRegions(Graph, sccNodeID): ''' Given a Graph with a BowTie structure as described in http://snap.stanford.edu/class/cs224w-readings/broder00bowtie.pdf and an sccNodeID of a node known to belong to the central SCC, determines the size of each component. returns: tuple of sizes (SCC, IN, OUT, TENDRILS, DISCONNECTED) ''' totalNodes = Graph.GetNodes() wcc = snap.GetMxWcc(Graph) assert wcc.IsNode(sccNodeID) wccNodes = wcc.GetNodes() disconnectedNodes = totalNodes - wccNodes scc = snap.GetMxScc(Graph) # Sanity check the input. assert scc.IsNode(sccNodeID) sccNodes = scc.GetNodes() sccAndOutNodes = snap.GetBfsTree(Graph, sccNodeID, True, False).GetNodes() sccAndInNodes = snap.GetBfsTree(Graph, sccNodeID, False, True).GetNodes() inNodes = sccAndInNodes - sccNodes outNodes = sccAndOutNodes - sccNodes tendrilNodes = wccNodes - (inNodes + outNodes + sccNodes) nodes = (sccNodes, inNodes, outNodes, tendrilNodes, disconnectedNodes) assert sum(nodes) == Graph.GetNodes() return nodes
def initNetwork(self,Ajen,keyList): self.Ajen=Ajen self.keyList=keyList self.myGraph = snap.TNEANet.New() self.nid2id=dict() self.id2nid=dict() length=len(keyList) for i in range(length): theKey=keyList[i] nid=self.myGraph.AddNode(i) self.myGraph.AddStrAttrDatN(nid, theKey, 'key') self.nid2id[nid]=theKey self.id2nid[theKey]=nid self.outputList=[] for i in range(length): for j in range(i+1,length): if Ajen[i,j]>0: eid=self.myGraph.AddEdge(i, j) self.myGraph.AddFltAttrDatE(eid, Ajen[i,j], 'weigth') # eid=self.myGraph.AddEdge(j, i) # self.myGraph.AddFltAttrDatE(eid, Ajen[j,i], 'weigth') self.outputList.append([keyList[i],keyList[j], Ajen[i,j]]) print '-original: '+str(self.myGraph.GetEdges())+' '+str(self.myGraph.GetNodes()) self.MxWcc = snap.GetMxWcc(self.myGraph) print '-mxWcc: '+str(self.MxWcc.GetEdges())+' '+str(self.MxWcc.GetNodes())
def generate_steam_edge_list(): FIn = snap.TFIn("graph/steam.graph") G = snap.TUNGraph.Load(FIn) G = snap.GetMxWcc(G) user_node_array = [] #88310 with open('graph/user_node.txt', 'r') as f: for line in f: user_node_array.append(int(line)) game_node_array = [] #10978 with open('graph/game_node.txt', 'r') as f: for line in f: game_node_array.append(int(line)) with open('graph/steam_edge_list.csv', 'w') as f: writer = csv.writer(f, delimiter=',') for edge in G.Edges(): # eid = edge.GetId() id1 = edge.GetSrcNId() id2 = edge.GetDstNId() if id1 in user_node_array: row = [str(id1), 'g' + str(id2)] else: row = [str(id2), 'g' + str(id1)] writer.writerow(row)
def preproc_graph(filename): ''' get connected graph I beleive this is done after we remap the nodes to consecutive order in map_nodes_new.py ''' print "Working on %s \n" % filename print "Generating graph from edge list..." # laod edge list into snap Graph0 = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1, '\t') # get edges V0 = Graph0.GetNodes() # delete zero degree nodes snap.DelZeroDegNodes(Graph0) print "Done generating graph!" # get max weakly connected component print "Generating connected graph..." Graph = snap.GetMxWcc(Graph0) V = Graph.GetNodes() E = Graph.GetEdges() print "Done generating graph with V = %i, E = %i!, V0 = %i" % (V, E, V0) # get nodes included in weakly connected graph (which could be # a proper subset of original set) # Find one edge in graph and find all connected nodes for EI in Graph.Edges(): conn_node = EI.GetSrcNId() # start with one edge break # only need one edge since connected CnCom = snap.TIntV() snap.GetNodeWcc(Graph, conn_node, CnCom) conn_node_ids = sort(array([node for node in CnCom])) return Graph, conn_node_ids, V, E, V0
def main(version): starttime = datetime.datetime.now() codePath = sys.path[0] s = codePath.split('\\') workPath = s[0] + '\\' + s[1] + '\\' + s[ 2] + '\\data\\flixster\\commondata\\' #f:\project\somproject filePath1 = workPath + 'finalSocial' + version + '.txt' # transfer node string to num 2131313 to 1 # use the index of list to represent the node totalNodeList = [] G1 = snap.TUNGraph.New() for line in open(filePath1): if line == '': break linkPair = line[:-1].split('\t') node1 = int(linkPair[0]) node2 = int(linkPair[1]) if node1 not in totalNodeList: totalNodeList.append(node1) if node2 not in totalNodeList: totalNodeList.append(node2) node1MapNum = totalNodeList.index(node1) node2MapNum = totalNodeList.index(node2) if not G1.IsNode(node1MapNum): G1.AddNode(node1MapNum) if not G1.IsNode(node2MapNum): G1.AddNode(node2MapNum) G1.AddEdge(node1MapNum, node2MapNum) print 'get the max connected component...' MxWcc = snap.GetMxWcc(G1) print 'the max connected component node num is %d ' % MxWcc.GetNodes() print MxWcc.GetEdges() # filePath2=workPath+'finalUserID.txt' # finalNodeList=[] # for line in open(filePath2): # if line=='': # break # nodeStr=line[:-1] # node=int(nodeStr) # nodeMapNum=totalNodeList.index(node) # if MxWcc.IsNode(nodeMapNum): # finalNodeList.append(node) # print 'the final user num is %d' %len(finalNodeList) FOut = snap.TFOut(workPath + 'finalSocial' + version + '.graph') MxWcc.Save(FOut) FOut.Flush() print 'finished' endtime = datetime.datetime.now() print 'passed time is %d s' % (endtime - starttime).seconds
def main(): starttime = datetime.datetime.now() codePath = sys.path[0] s = codePath.split('\\') workPath = s[0] + '\\' + s[1] + '\\' + s[ 2] + '\\data\\baidu\\' #f:\project\somproject # transfer node string to num 2131313 to 1 # use the index of list to represent the node print 'use social data to build the graph... ' filePath1 = workPath + 'commondata\\rawSocial.txt' totalNodeList = [] G1 = snap.TUNGraph.New() for line in open(filePath1): if line == '': break linkPair = line[:-1].split('\t') node1 = int(linkPair[0]) node2 = int(linkPair[1]) if node1 not in totalNodeList: totalNodeList.append(node1) if node2 not in totalNodeList: totalNodeList.append(node2) node1MapNum = totalNodeList.index(node1) node2MapNum = totalNodeList.index(node2) if not G1.IsNode(node1MapNum): G1.AddNode(node1MapNum) if not G1.IsNode(node2MapNum): G1.AddNode(node2MapNum) G1.AddEdge(node1MapNum, node2MapNum) print 'get the max connected component...' MxWcc = snap.GetMxWcc(G1) print 'the max connected component node num is %d ' % MxWcc.GetNodes() print 'get user id in the max connected component... ' writer2 = open(workPath + 'commondata\\coreUserID.txt', 'w') filePath2 = workPath + 'commondata\\rawCoreUserID.txt' coreUserList = [] for line in open(filePath2): if line == '': break nodeStr = line[:-1] node = int(nodeStr) nodeMapNum = totalNodeList.index(node) if MxWcc.IsNode(nodeMapNum): coreUserList.append(node) nodeLine = str(node) + '\n' writer2.write(nodeLine) writer2.close() print 'the core user num is %d' % len(coreUserList) print 'finished' endtime = datetime.datetime.now() print 'passed time is %d s' % (endtime - starttime).seconds
def createGraph(nodes, edges): G = snap.TUNGraph.New() renumbered = {} idToOsmid = {} counter = 0 for osmid in edges: refs = edges[osmid] for i in xrange(0, len(refs) - 1): start = refs[i] end = refs[i+1] # not all edges in a way are in nodes in the graph if at the boundary if start not in nodes or end not in nodes: continue # if way is a road, add nodes if they haven't been added before if start not in renumbered: renumbered[start] = counter idToOsmid[counter] = start G.AddNode(counter) counter += 1 if end not in renumbered: renumbered[end] = counter idToOsmid[counter] = end G.AddNode(counter) counter += 1 G.AddEdge(renumbered[start], renumbered[end]) G = snap.GetMxWcc(G) return G, idToOsmid
def Q2_4(): epinions, email = loadNetworks() for trial in xrange(TRIALS): for (name, network) in [("Epinions", epinions), ("Email", email)]: print("Probability of path for entire %s is %s." % (name, ProbabilityOfPath(network))) print("Probability of path in largest WCC of %s is %s." % (name, ProbabilityOfPath(snap.GetMxWcc(network))))
def __init__(self, nodes, edges, edge_list=None): if edge_list is None: G = snap.GenRndGnm(snap.PUNGraph, nodes, edges) self.graph = snap.GetMxWcc(G) else: self.graph = snap.LoadEdgeList(snap.PUNGraph, edge_list, 0, 1) self.assignment = {} self.max_type, self.min_type = None, None
def quick_properties(graph, name, dic_path): """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """ n_edges = graph.GetEdges() n_nodes = graph.GetNodes() print("##########") print("Quick overview of {} Network".format(name)) print("##########") print("{} Nodes, {} Edges").format(n_nodes, n_edges) print("{} Self-edges ".format(snap.CntSelfEdges(graph))) print("{} Directed edges, {} Undirected edges".format( snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph))) print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph))) print("{} 0-out-degree nodes, {} 0-in-degree nodes".format( snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0))) node_in = graph.GetNI(snap.GetMxInDegNId(graph)) node_out = graph.GetNI(snap.GetMxOutDegNId(graph)) print("Maximum node in-degree: {}, maximum node out-degree: {}".format( node_in.GetDeg(), node_out.GetDeg())) print("###") components = snap.TCnComV() snap.GetWccs(graph, components) max_wcc = snap.GetMxWcc(graph) print "{} Weakly connected components".format(components.Len()) print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(), max_wcc.GetEdges()) prankH = snap.TIntFltH() snap.GetPageRank(graph, prankH) sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(graph, NIdHubH, NIdAuthH) sorted_NIdHubH = sorted(NIdHubH, key=lambda key: NIdHubH[key], reverse=True) sorted_NIdAuthH = sorted(NIdAuthH, key=lambda key: NIdAuthH[key], reverse=True) with open(dic_path, 'rb') as dic_id: mydict = pickle.load(dic_id) print("3 most central players by PageRank scores: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])], list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_prankH[2])])) print("Top 3 hubs: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[2])])) print("Top 3 authorities: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[2])]))
def max_wcc_info(edges_file, key_file, valuefn): print '\nLoading edge list...' G = snap.LoadEdgeList(snap.PUNGraph, edges_file, 0, 1) MaxWCC = snap.GetMxWcc(G) print '\nBuilding legend...' legend, full_lines = build_legends(key_file, valuefn, '\t') return MaxWCC, legend, full_lines
def main(args): review_file = args.review review_maxwcc_file = args.review_maxwcc # load graph G = snap.LoadEdgeList(snap.PUNGraph, review_file, 0, 1) # get wcc MxWcc = snap.GetMxWcc(G) # save snap.SaveEdgeList(MxWcc, review_maxwcc_file)
def get_connected_component(graph): if isinstance(graph, snap.PNGraph): lcc = snap.GetMxScc(graph) # renumber the node numbers from 0 to the size-1 lcc = snap.ConvertGraph(snap.PNGraph, lcc, True) elif isinstance(graph, snap.PUNGraph): lcc = snap.GetMxWcc(graph) # renumber the node numbers from 0 to the size-1 lcc = snap.ConvertGraph(snap.PUNGraph, lcc, True) else: raise NotAGraphError(graph) return lcc
def processNetwork(Graph, id_to_groups): with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f: f.write("RELATED GROUPS GRAPH:\n") f.write('Edges: %d\n' % Graph.GetEdges()) f.write('Nodes: %d\n\n' % Graph.GetNodes()) MxWcc = snap.GetMxWcc(Graph) f.write("MAX WCC:\n") f.write('Edges: %f ' % MxWcc.GetEdges()) f.write('Nodes: %f \n' % MxWcc.GetNodes()) f.write('Node List: ') for node in MxWcc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxWcc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL WCCs:") Components = snap.TCnComV() snap.GetWccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nWcc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) MxScc = snap.GetMxScc(Graph) f.write("\n\nMAX SCC:\n") f.write('Edges: %f ' % MxScc.GetEdges()) f.write('Nodes: %f \n' % MxScc.GetNodes()) f.write('Node List: ') for node in MxScc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxScc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL SCCs:") Components = snap.TCnComV() snap.GetSccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nScc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) f.write('\n\nCLUSTERING AND COMMUNITIES:\n') f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1)) f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1)) Nodes = snap.TIntV() for node in Graph.Nodes(): Nodes.Add(node.GetId()) f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
def enumerate_graphs(self, k): for seq in itertools.product("01", repeat=k*(k-1)): g = snap.TNGraph.New() for i in range(k): g.AddNode(i) for i,e in enumerate(seq): if e=='1': start_node = i/(k-1) end_node = i % (k-1) if end_node >= start_node: end_node += 1 g.AddEdge(start_node, end_node) if snap.GetMxWcc(g).GetNodes()==k: yield g
def q2_3_util(dataset_name): # G = load_graph("email") G = load_graph(dataset_name) MxWcc = snap.GetMxWcc(G) total_size = G.GetNodes() wcc_size = MxWcc.GetNodes() disconnected_size = total_size - wcc_size print 'Total size: ', total_size print 'WCC size: ', wcc_size print 'DISCONNECTED: ', disconnected_size Rnd = snap.TRnd(42) Rnd.Randomize() MxScc = snap.GetMxScc(G) scc_size = MxScc.GetNodes() number_of_trials = 1 scc_plus_out = 0 scc_plus_in = 0 out_size = 0 in_size = 0 tendrils_plus_tubes = 0 for i in xrange(number_of_trials): NId = MxScc.GetRndNId(Rnd) # print 'Random node id', NId outward_set = set() BfsTree = snap.GetBfsTree(G, NId, True, False) for EI in BfsTree.Edges(): outward_set.add(EI.GetDstNId()) scc_plus_out = max(scc_plus_out, len(outward_set)) out_size = max( out_size, scc_plus_out - scc_size) # inward_set = set() BfsTree = snap.GetBfsTree(G, NId, False, True) for EI in BfsTree.Edges(): inward_set.add(EI.GetDstNId()) scc_plus_in = max(scc_plus_in, len(inward_set)) in_size = max(in_size, scc_plus_in - scc_size) tendrils_plus_tubes = max(tendrils_plus_tubes, wcc_size - in_size - out_size) print 'IN: ', in_size print 'scc_size', scc_size print 'scc + out: ', scc_plus_out print 'OUT: ', out_size print 'scc + in: ', scc_plus_in print 'TENDRILS + TUBES', tendrils_plus_tubes print '------------------'
def calculateWccSimilarity(G, partition): MxWcc = snap.GetMxWcc(G) numNodes = MxWcc.GetNodes() intersection = 0 total = 0 for NI in G.Nodes(): if NI.GetId() in partition[0]: intersection += 1 total += 1 for NId in partition[0]: if not G.IsNode(NId): total += 1 Jaccard = intersection * 1.0 / total return numNodes, Jaccard
def main(): network = snap.LoadEdgeList( snap.PNEANet, "/Users/qingyuan/CS224W/stackoverflow-Java.txt", 0, 1) Components = snap.TCnComV() snap.GetWccs(network, Components) print("The number of weakly connected components is %d" % Components.Len()) MxWcc = snap.GetMxWcc(network) print( "The number of edges is %d and the number of nodes is %d in the largest weakly connected component." % (MxWcc.GetNodes(), MxWcc.GetEdges())) PRankH = snap.TIntFltH() snap.GetPageRank(network, PRankH) PRankH.SortByDat(False) num = 0 print( "IDs of the top 3 most central nodes in the network by PagePank scores. " ) for item in PRankH: print(item, PRankH[item]) num += 1 if num == 3: num = 0 break NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(network, NIdHubH, NIdAuthH) NIdHubH.SortByDat(False) print("IDs of the top 3 hubs in the network by HITS scores. ") for item in NIdHubH: print(item, NIdHubH[item]) num += 1 if num == 3: num = 0 break NIdAuthH.SortByDat(False) print("IDs of top 3 authorities in the network by HITS scores. ") for item in NIdAuthH: print(item, NIdAuthH[item]) num += 1 if num == 3: num = 0 break
def analyze(graph): n = graph.GetNodes() m = graph.GetEdges() maxSCCsize = snap.GetMxSccSz(graph) maxWCCsize = snap.GetMxWccSz(graph) avgDegree = (m * float(2)) / n # estimate power law exponent degs = [] degCounts = [] DegToCntV = snap.TIntPrV() snap.GetDegCnt(graph, DegToCntV) for item in DegToCntV: degs.append(item.GetVal1()) degCounts.append(item.GetVal2()) xMin = min(degs) - 0.5 m = graph.GetNodes() alphaMLLE = 1 + (m / (sum([np.log(i / xMin) * degCounts[degs.index(i)] for i in degs]))) # erdos-renyi clustering coefficient graphER = snap.GenRndGnm(snap.PUNGraph, n, m) avgClustCoeffER = snap.GetClustCf(graphER, -1) # average shortest path graphWCC = snap.GetMxWcc(graph) avgClustCoeff = snap.GetClustCf(graphWCC, -1) numSamples = min(graphWCC.GetNodes(), 617) # all nodes or sample size Rnd = snap.TRnd(42) Rnd.Randomize() shortPathList = [] for i in xrange(numSamples): s = graphWCC.GetRndNId(Rnd) NIdToDistH = snap.TIntH() snap.GetShortPath(graphWCC, s, NIdToDistH) for item in NIdToDistH: shortPathList.append(NIdToDistH[item]) avgShortPath = np.mean(shortPathList) return avgClustCoeff, maxSCCsize, maxWCCsize, avgDegree, alphaMLLE, avgClustCoeffER, avgShortPath
def q2_3_aux(name): G = load_graph(name) SCC = snap.GetMxScc(G).GetNodes() wcc = snap.GetMxWcc(G).GetNodes() inexplosionVect = emIn if name == "email" else epIn outexplosionVect = emOut if name == "email" else epOut ineexpl = inexplosionVect[-1] outeexpl = outexplosionVect[-1] IN = ineexpl - SCC OUT = outeexpl - SCC DISCONNECTED = G.GetNodes()-wcc TENDRILS_AND_TUBES = wcc - IN - OUT - SCC print name,"DISCONNECTED:",DISCONNECTED,"IN:",IN,"OUT:",OUT,"SCC:",SCC,"TENDRILS + TUBES:",TENDRILS_AND_TUBES return
def q3(): G = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) components = snap.TCnComV() snap.GetWccs(G, components) print("Number of WCC: ", components.Len()) MxComp = snap.GetMxWcc(G) cnt_mxc_node = 0 cnt_mxc_edge = 0 for _ in MxComp.Nodes(): cnt_mxc_node += 1 for _ in MxComp.Edges(): cnt_mxc_edge += 1 print("Number of edges and nodes in MxWCC: ", cnt_mxc_node, ' ', cnt_mxc_edge) PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) scores = [] for id in PRankH: scores.append((PRankH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 PageRank scores: ", res) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) scores = [] for id in NIdHubH: scores.append((NIdHubH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 hubs by HITS scores: ", res) scores = [] for id in NIdAuthH: scores.append((NIdAuthH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 authorities by HITS scores: ", res)
def graph_cleaning(file_path): Graph, H = load_graph(file_path) Graph = snap.GetMxWcc(Graph) snap.DelSelfEdges(Graph) nodes_set = set() for NI in Graph.Nodes(): nodes_set.add(NI.GetId()) with open(file_path, 'r') as f: raw_list = f.read().split('\n') edges_list = [edge_str.split() for edge_str in raw_list] with open(file_path, 'w') as f: print '-----clear' with open(file_path, 'a') as f: for edge in edges_list: if len(edge) == 0: continue if H.GetKeyId(edge[0]) not in nodes_set: continue edge_cleaned = list() for node in edge: if H.GetKeyId(node) in nodes_set: edge_cleaned.append(node) f.write(' '.join(edge_cleaned) + '\n')
def stackoverflow(): g = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) components = snap.TCnComV() snap.GetWccs(g, components) print "Num connected comp = ", components.Len() mxwcc = snap.GetMxWcc(g) print "Num edges in largest = ", mxwcc.GetEdges() print "Num nodes in largest = ", mxwcc.GetNodes() rank = snap.TIntFltH() snap.GetPageRank(g, rank) rank.SortByDat(False) count = 0 for node in rank: if count >= 3: break count += 1 print "largest page rank score nodes = ", node, " (score = ", rank[node] hubs = snap.TIntFltH() auths = snap.TIntFltH() snap.GetHits(g, hubs, auths) hubs.SortByDat(False) count = 0 for node in hubs: if count >= 3: break count += 1 print "largest hub score nodes = ", node, " (score = ", hubs[node] auths.SortByDat(False) count = 0 for node in auths: if count >= 3: break count += 1 print "largest auth score nodes = ", node, " (score = ", auths[node]
def partThree(): data_dir_StackOverFlow = './data/stackoverflow-Java.txt' sofG = snap.LoadEdgeList(snap.PNGraph, data_dir_StackOverFlow, 0, 1, '\t') Components = snap.TCnComV() snap.GetWccs(sofG, Components) print('1. The number of weakly connected components in the network.: '+str(Components.Len())) MxWcc = snap.GetMxWcc(sofG) num_node = MxWcc.GetNodes() num_deg = MxWcc.GetEdges() print('2. The number of edges is {} and the number of nodes is {}'.format(num_deg, num_node)) PRankH = snap.TIntFltH() snap.GetPageRank(sofG, PRankH) cnt = 0 print('3. ') for item in PRankH: cnt += 1 if cnt > 3: break print(item, PRankH[item]) print('4. ') NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(sofG, NIdHubH, NIdAuthH) HubDict = {} AuthDict = {} for item in NIdHubH: HubDict[item] = NIdHubH[item] a = zip(HubDict.values(), HubDict.keys()) print(list(sorted(a, reverse=True))[:3]) for item in NIdAuthH: AuthDict[item] = NIdAuthH[item] b = zip(AuthDict.values(), AuthDict.keys()) print(list(sorted(b, reverse=True))[:3])
def analyze_graph(G): WCC = snap.GetMxWcc(G) SCC = snap.GetMxScc(G) id = SCC.GetRndNId() out_tree = snap.GetBfsTree(G, id, True, False) in_tree = snap.GetBfsTree(G, id, False, True) G_size = G.GetNodes() SCC_size = SCC.GetNodes() WCC_size = WCC.GetNodes() DISCONNECTED_size = G_size - WCC_size in_size = in_tree.GetNodes() - SCC_size out_size = out_tree.GetNodes() - SCC_size Tendril_size = G_size - SCC_size - DISCONNECTED_size - in_size - out_size print 'Total Graph Size: %d' % G_size print 'SCC Size: %d' % SCC_size print 'WCC Size: %d' % WCC_size print 'IN Size: %d' % in_size print 'OUT Size: %d' % out_size print 'DISCONNECTED Size: %d' % DISCONNECTED_size print 'Tendril tube size (remaining): %d' % Tendril_size print()
def per_graph(graph, name): mxWcc = snap.GetMxWcc(graph) mxScc = snap.GetMxScc(graph) print '' print 'Size analysis on {}'.format(name) print 'Disconnected size = {}'.format(graph.GetNodes() - mxWcc.GetNodes()) print 'SCC size = {}'.format(mxScc.GetNodes()) trials = 200 avg_reached_out = 0 avg_reached_in = 0 for _ in range(trials): nodeId = mxScc.GetRndNId() avg_reached_out += snap.GetBfsTree(graph, nodeId, True, False).GetNodes() avg_reached_in += snap.GetBfsTree(graph, nodeId, False, True).GetNodes() scc_out = float(avg_reached_out) / trials scc_in = float(avg_reached_in) / trials out_sz = scc_out - mxScc.GetNodes() in_sz = scc_in - mxScc.GetNodes() print 'OUT size = {}'.format(out_sz) print 'IN size = {}'.format(in_sz) print 'Tendrils/Tubes size = {}'.format(mxWcc.GetNodes() - mxScc.GetNodes() - out_sz - in_sz)
#Load the stack overflow grap G1 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) #1. Get the list of all weakly connected components Components = snap.TCnComV() snap.GetWccs(G1, Components) wccCount = 0 for Cc in Components: wccCount = wccCount + 1 print "1. Number of Weakly Connected Components: ", wccCount #2. Get The number of edges and the number # of nodes in the largest weakly connected component maxWcc = snap.GetMxWcc(G1) EdgeCount = 0 NodeCount = 0 for E in maxWcc.Edges(): EdgeCount = EdgeCount + 1 for N in maxWcc.Nodes(): NodeCount = NodeCount + 1 print "2. Number of edges and nodes in largest wcc" print "EdgeCount : ", EdgeCount print "NodeCount : ", NodeCount #3 Get The top 3 most central nodes in the network by PagePank scores PRankH = snap.TIntFltH() snap.GetPageRank(G1, PRankH)