def pageRank_components(g): print 'executing pagerank components ---- getting components for page rank' Components = snap.TCnComV() snap.GetWccs(g, Components) f = open('component_pr.txt', 'w') cgraphs = [] for com in Components: v = snap.TIntV() for ni in com: v.Add(ni) cgraphs.append(snap.GetSubGraph_PNGraph(g, v)) print 'components retrived for pagerank' f.write('Total components:' + str(len(cgraphs)) + '\n') for graph in cgraphs: if graph.GetNodes() == 2: continue sprank = snap.TIntFltH() snap.GetPageRank_PNGraph(graph, sprank) sprank.SortByDat(False) f.write( str(graph.GetNodes()) + ' ' + str(sprank[sprank.BegI().GetKey()]) + '\n') f.close() print 'finished writing pagerank components values'
def gen_G(D, Pi_minus, Pi_exo, V_exo, theta2, N): """ Returns pairwise-stable network on N nodes. D, Pi_minus, Pi_exo = outputs of gen_D(). V_exo = 'exogenous' part of joint surplus (output of gen_V_exo). theta2 = transitivity parameter (theta[2]). """ G = snap.GenRndGnm(snap.PUNGraph, N, 0) # initialize empty graph Components = snap.TCnComV() snap.GetWccs(D, Components) # collects components of D NIdV = snap.TIntV() # initialize vector for C in Components: if C.Len() > 1: NIdV.Clr() for i in C: NIdV.Add(i) tempnet = gen_G_subgraph(NIdV, D, Pi_minus, Pi_exo, V_exo, theta2) for edge in tempnet.Edges(): G.AddEdge(edge.GetSrcNId(), edge.GetDstNId()) # add robust links for edge in Pi_exo.Edges(): G.AddEdge(edge.GetSrcNId(), edge.GetDstNId()) return G
def quick_properties(graph, name, dic_path): """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """ n_edges = graph.GetEdges() n_nodes = graph.GetNodes() print("##########") print("Quick overview of {} Network".format(name)) print("##########") print("{} Nodes, {} Edges").format(n_nodes, n_edges) print("{} Self-edges ".format(snap.CntSelfEdges(graph))) print("{} Directed edges, {} Undirected edges".format( snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph))) print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph))) print("{} 0-out-degree nodes, {} 0-in-degree nodes".format( snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0))) node_in = graph.GetNI(snap.GetMxInDegNId(graph)) node_out = graph.GetNI(snap.GetMxOutDegNId(graph)) print("Maximum node in-degree: {}, maximum node out-degree: {}".format( node_in.GetDeg(), node_out.GetDeg())) print("###") components = snap.TCnComV() snap.GetWccs(graph, components) max_wcc = snap.GetMxWcc(graph) print "{} Weakly connected components".format(components.Len()) print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(), max_wcc.GetEdges()) prankH = snap.TIntFltH() snap.GetPageRank(graph, prankH) sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(graph, NIdHubH, NIdAuthH) sorted_NIdHubH = sorted(NIdHubH, key=lambda key: NIdHubH[key], reverse=True) sorted_NIdAuthH = sorted(NIdAuthH, key=lambda key: NIdAuthH[key], reverse=True) with open(dic_path, 'rb') as dic_id: mydict = pickle.load(dic_id) print("3 most central players by PageRank scores: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])], list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_prankH[2])])) print("Top 3 hubs: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[2])])) print("Top 3 authorities: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[2])]))
def computeWeaklyConnectedComponents(graph, outFile): logger.info("Computing Weakly Connected Components") fw_cc = open(outFile, 'w') Components = snap.TCnComV() snap.GetWccs(graph, Components) for CnCom in Components: for item in CnCom: fw_cc.write(str(item) + "\n") fw_cc.write("\n") logger.info("Weakly Connected Components Computed!") logger.info("Weakly Connected Components Exported to " + outFile)
def processNetwork(Graph, id_to_groups): with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f: f.write("RELATED GROUPS GRAPH:\n") f.write('Edges: %d\n' % Graph.GetEdges()) f.write('Nodes: %d\n\n' % Graph.GetNodes()) MxWcc = snap.GetMxWcc(Graph) f.write("MAX WCC:\n") f.write('Edges: %f ' % MxWcc.GetEdges()) f.write('Nodes: %f \n' % MxWcc.GetNodes()) f.write('Node List: ') for node in MxWcc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxWcc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL WCCs:") Components = snap.TCnComV() snap.GetWccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nWcc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) MxScc = snap.GetMxScc(Graph) f.write("\n\nMAX SCC:\n") f.write('Edges: %f ' % MxScc.GetEdges()) f.write('Nodes: %f \n' % MxScc.GetNodes()) f.write('Node List: ') for node in MxScc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxScc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL SCCs:") Components = snap.TCnComV() snap.GetSccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nScc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) f.write('\n\nCLUSTERING AND COMMUNITIES:\n') f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1)) f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1)) Nodes = snap.TIntV() for node in Graph.Nodes(): Nodes.Add(node.GetId()) f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
def wccs(self, returnNodes=True): """ Returns a list of sets of nodes, or just the IDs if returnNodes is false (note that getting the nodes themselves adds overhead) """ wccs = snap.TCnComV() wccList = [] snap.GetWccs(self.rawGraph, wccs) for wcc in wccs: wccList.append(SnapUtil.rawComponentToNodeSet(wcc, self, returnNodes)) wccList.sort(key=lambda x: len(x),reverse=True) return wccList
def is_uniquely_connected(graph): def is_unique(components): return len(list(filter(lambda comp: comp.Len() > 1, components))) == 1 # First identify if there are strongly connected components in the graph s_components = snap.TCnComV() snap.GetSccs(graph, s_components) unique = is_unique(s_components) # if there is unique strongly connected component then we don't need to search # for the weakly because the graph is connected, otherwise implement the same search # on the weakly components. if not is_unique: w_components = snap.TCnComV() snap.GetWccs(graph, w_components) unique = is_unique(w_components) return unique
def main(): network = snap.LoadEdgeList( snap.PNEANet, "/Users/qingyuan/CS224W/stackoverflow-Java.txt", 0, 1) Components = snap.TCnComV() snap.GetWccs(network, Components) print("The number of weakly connected components is %d" % Components.Len()) MxWcc = snap.GetMxWcc(network) print( "The number of edges is %d and the number of nodes is %d in the largest weakly connected component." % (MxWcc.GetNodes(), MxWcc.GetEdges())) PRankH = snap.TIntFltH() snap.GetPageRank(network, PRankH) PRankH.SortByDat(False) num = 0 print( "IDs of the top 3 most central nodes in the network by PagePank scores. " ) for item in PRankH: print(item, PRankH[item]) num += 1 if num == 3: num = 0 break NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(network, NIdHubH, NIdAuthH) NIdHubH.SortByDat(False) print("IDs of the top 3 hubs in the network by HITS scores. ") for item in NIdHubH: print(item, NIdHubH[item]) num += 1 if num == 3: num = 0 break NIdAuthH.SortByDat(False) print("IDs of top 3 authorities in the network by HITS scores. ") for item in NIdAuthH: print(item, NIdAuthH[item]) num += 1 if num == 3: num = 0 break
def q3(): G = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) components = snap.TCnComV() snap.GetWccs(G, components) print("Number of WCC: ", components.Len()) MxComp = snap.GetMxWcc(G) cnt_mxc_node = 0 cnt_mxc_edge = 0 for _ in MxComp.Nodes(): cnt_mxc_node += 1 for _ in MxComp.Edges(): cnt_mxc_edge += 1 print("Number of edges and nodes in MxWCC: ", cnt_mxc_node, ' ', cnt_mxc_edge) PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) scores = [] for id in PRankH: scores.append((PRankH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 PageRank scores: ", res) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) scores = [] for id in NIdHubH: scores.append((NIdHubH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 hubs by HITS scores: ", res) scores = [] for id in NIdAuthH: scores.append((NIdAuthH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 authorities by HITS scores: ", res)
def stackoverflow(): g = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) components = snap.TCnComV() snap.GetWccs(g, components) print "Num connected comp = ", components.Len() mxwcc = snap.GetMxWcc(g) print "Num edges in largest = ", mxwcc.GetEdges() print "Num nodes in largest = ", mxwcc.GetNodes() rank = snap.TIntFltH() snap.GetPageRank(g, rank) rank.SortByDat(False) count = 0 for node in rank: if count >= 3: break count += 1 print "largest page rank score nodes = ", node, " (score = ", rank[node] hubs = snap.TIntFltH() auths = snap.TIntFltH() snap.GetHits(g, hubs, auths) hubs.SortByDat(False) count = 0 for node in hubs: if count >= 3: break count += 1 print "largest hub score nodes = ", node, " (score = ", hubs[node] auths.SortByDat(False) count = 0 for node in auths: if count >= 3: break count += 1 print "largest auth score nodes = ", node, " (score = ", auths[node]
def partThree(): data_dir_StackOverFlow = './data/stackoverflow-Java.txt' sofG = snap.LoadEdgeList(snap.PNGraph, data_dir_StackOverFlow, 0, 1, '\t') Components = snap.TCnComV() snap.GetWccs(sofG, Components) print('1. The number of weakly connected components in the network.: '+str(Components.Len())) MxWcc = snap.GetMxWcc(sofG) num_node = MxWcc.GetNodes() num_deg = MxWcc.GetEdges() print('2. The number of edges is {} and the number of nodes is {}'.format(num_deg, num_node)) PRankH = snap.TIntFltH() snap.GetPageRank(sofG, PRankH) cnt = 0 print('3. ') for item in PRankH: cnt += 1 if cnt > 3: break print(item, PRankH[item]) print('4. ') NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(sofG, NIdHubH, NIdAuthH) HubDict = {} AuthDict = {} for item in NIdHubH: HubDict[item] = NIdHubH[item] a = zip(HubDict.values(), HubDict.keys()) print(list(sorted(a, reverse=True))[:3]) for item in NIdAuthH: AuthDict[item] = NIdAuthH[item] b = zip(AuthDict.values(), AuthDict.keys()) print(list(sorted(b, reverse=True))[:3])
# get the weakly connected component counts WccSzCnt = snap.TIntPr64V() snap.GetWccSzCnt(G, WccSzCnt) #print (WccSzCnt[0],WccSzCnt[0].Val1,WccSzCnt[0].Val2) for i in range(0, WccSzCnt.Len()): print("WccSzCnt[%d] = (%d, %d)" % (i, WccSzCnt[i].Val1.Val, WccSzCnt[i].Val2.Val)) # return nodes in the same weakly connected component as node 1 CnCom = snap.TInt64V() snap.GetNodeWcc(G, 1, CnCom) print("CnCom.Len() = %d" % (CnCom.Len())) # get nodes in weakly connected components WCnComV = snap.TCnComV() snap.GetWccs(G, WCnComV) for i in range(0, WCnComV.Len()): print("WCnComV[%d].Len() = %d" % (i, WCnComV[i].Len())) for j in range(0, WCnComV[i].Len()): print("WCnComV[%d][%d] = %d" % (i, j, WCnComV[i][j])) # get the size of the maximum weakly connected component MxWccSz = snap.GetMxWccSz(G) print("MxWccSz = %.5f" % (MxWccSz)) # get the graph with the largest weakly connected component GMx = snap.GetMxWcc(G) print("GMx: GetNodes() = %d, GetEdges() = %d" % (GMx.GetNodes(), GMx.GetEdges())) # get strongly connected components
def net_structure(dataset_dir, output_dir, net, IsDir, weight): print( "\n######################################################################\n" ) if os.path.isfile(str(output_dir) + str(net) + "_connected_comp.json"): print("Arquivo já existe: " + str(output_dir) + str(net) + "_connected_comp.json") else: print("Componentes conectados - " + str(dataset_dir)) cc = [] # Média do tamanho dos componentes conectados por rede-ego cc_normal = [ ] # Média (normalizada pelo número de vértices do grafo) do tamanho dos componentes conectados por rede-ego n_cc = [] # Média do número de componentes conectados por rede-ego n = [] # vetor com número de vértices para cada rede-ego e = [] # vetor com número de arestas para cada rede-ego i = 0 for file in os.listdir(dataset_dir): i += 1 print( str(output_dir) + str(net) + "/" + str(file) + " - Calculando propriedades para o ego " + str(i) + ": " + str(file)) if IsDir is True: G = snap.LoadEdgeList( snap.PNGraph, dataset_dir + file, 0, 1 ) # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t') else: G = snap.LoadEdgeList( snap.PUNGraph, dataset_dir + file, 0, 1 ) # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t') # G.Dump() # time.sleep(5) ##################################################################################### n.append(G.GetNodes()) # Numero de vertices e.append(G.GetEdges()) # Numero de arestas n_nodes = G.GetNodes() n_edges = G.GetEdges() ##################################################################################### if n_edges == 0: a = 0 cc.append(a) cc_normal.append(a) n_cc.append(a) print("Nenhuma aresta encontrada para a rede-ego " + str(i) + " - (" + str(file)) else: Components = snap.TCnComV() snap.GetWccs(G, Components) _cc = [] _cc_normal = [] _n_cc = 0 for CnCom in Components: _cc.append(CnCom.Len()) b = float(CnCom.Len()) / float(n_nodes) _cc_normal.append(b) _n_cc += 1 result = calc.calcular(_cc) cc.append(result['media']) result_normal = calc.calcular(_cc_normal) cc_normal.append(result_normal['media']) n_cc.append(_n_cc) print("Número de componentes conectados para o ego " + str(i) + " (" + str(file) + "): " + str(_n_cc)) print( "Média do tamanho dos componentes conectados para o ego " + str(i) + " (" + str(file) + "): " + str(result['media'])) print( "Média (normalizada) do tamanho dos componentes conectados para o ego " + str(i) + " (" + str(file) + "): " + str(result_normal['media'])) print N_CC = calc.calcular_full(n_cc) CC = calc.calcular_full(cc) CC_NORMAL = calc.calcular_full(cc_normal) overview = {} overview['Len_ConnectedComponents'] = CC overview['Len_ConnectedComponents_Normal'] = CC_NORMAL overview['N_ConnectedComponents'] = N_CC with open(str(output_dir) + str(net) + "_connected_comp.json", 'w') as f: f.write(json.dumps(overview)) with open(str(output_dir) + str(net) + "_connected_comp.txt", 'w') as f: f.write( "\n######################################################################\n" ) f.write( "Number_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" % (N_CC['media'], N_CC['variancia'], N_CC['desvio_padrao'])) f.write( "Length_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" % (CC['media'], CC['variancia'], CC['desvio_padrao'])) f.write( "Length_Connected_Comp_Normalized: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" % (CC_NORMAL['media'], CC_NORMAL['variancia'], CC_NORMAL['desvio_padrao'])) f.write( "\n######################################################################\n" ) print( "\n######################################################################\n" ) print( "Number_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" % (N_CC['media'], N_CC['variancia'], N_CC['desvio_padrao'])) print( "Length_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" % (CC['media'], CC['variancia'], CC['desvio_padrao'])) print( "Length_Connected_Comp_Normalized: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" % (CC_NORMAL['media'], CC_NORMAL['variancia'], CC_NORMAL['desvio_padrao'])) print( "\n######################################################################\n" )
def connectedComponent(clusterCommands, Graph, conn, cur): Components = snap.TCnComV() snap.GetWccs(Graph, Components) createTable(clusterCommands, Components, conn, cur)
print(line) #Below addresses 1.f,g g_outdeg = snap.TFltPr64V() g_indeg = snap.TFltPr64V() snap.GetOutDegCnt(g, g_outdeg) snap.GetInDegCnt(g, g_indeg) #g_outdeg is a vector of pairs of floats. Each pair is addressed like (Val1,Val2) outdeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_outdeg)) indeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_indeg)) print(f'Nodes with outdegree > 10: {len(outdeg_gt_10)}') print(f'Nodes with indegree > 10: {len(indeg_gt_10)}') #Problem 2 so = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt") #2.1 so_wcc = snap.TCnComV() snap.GetWccs(so, so_wcc) print(f'# of connected components: {len(so_wcc)}') #2.2 so_mx_wcc = snap.GetMxWcc(so) snap.PrintInfo(so_mx_wcc, "Largest connected component of StackOverflow-Java") #2.3 so_pr = snap.TIntFlt64H() snap.GetPageRank(so, so_pr) so_pr.SortByDat(False) #Ascending=False #The code below might be a naive way to do it #Mb try GetKeyV() for hashtable types then just grab top 3 elements so_pr_ordered = [] so_pr_iter = so_pr.BegI() while not so_pr_iter.IsEnd(): so_pr_ordered.append(( so_pr_iter.GetKey(),
def quick_properties(graph, name, dic_path): """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """ results = {} n_edges = graph.GetEdges() n_nodes = graph.GetNodes() n_self_edges = snap.CntSelfEdges(graph) n_directed_edges, n_undirected_edges = snap.CntUniqDirEdges( graph), snap.CntUniqUndirEdges(graph) n_reciprocated_edges = snap.CntUniqBiDirEdges(graph) n_zero_out_nodes, n_zero_in_nodes = snap.CntOutDegNodes( graph, 0), snap.CntInDegNodes(graph, 0) max_node_in = graph.GetNI(snap.GetMxInDegNId(graph)).GetDeg() max_node_out = graph.GetNI(snap.GetMxOutDegNId(graph)).GetDeg() components = snap.TCnComV() snap.GetWccs(graph, components) max_wcc = snap.GetMxWcc(graph) results["a. Nodes"] = n_nodes results["b. Edges"] = n_edges results["c. Self-edges"] = n_self_edges results["d. Directed edges"] = n_directed_edges results["e. Undirected edges"] = n_undirected_edges results["f. Reciprocated edges"] = n_reciprocated_edges results["g. 0 out-degree nodes"] = n_zero_out_nodes results["h. 0 in-degree nodes"] = n_zero_in_nodes results["i. Maximum node out-degree"] = max_node_out results["j. Maximum node in-degree"] = max_node_in results["k. Weakly connected components"] = components.Len() results["l. Nodes, edges of largest WCC"] = (max_wcc.GetNodes(), max_wcc.GetEdges()) print("##########") print("Quick overview of {} Network".format(name)) print("##########") print("{} Nodes, {} Edges".format(n_nodes, n_edges)) print("{} Self-edges ".format(n_self_edges)) print("{} Directed edges, {} Undirected edges".format( n_directed_edges, n_undirected_edges)) print("{} Reciprocated edges".format(n_reciprocated_edges)) print("{} 0-out-degree nodes, {} 0-in-degree nodes".format( n_zero_out_nodes, n_zero_in_nodes)) print("Maximum node in-degree: {}, maximum node out-degree: {}".format( max_node_in, max_node_out)) print("###") print "{} Weakly connected components".format(components.Len()) print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(), max_wcc.GetEdges()) prankH = snap.TIntFltH() snap.GetPageRank(graph, prankH) sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(graph, NIdHubH, NIdAuthH) sorted_NIdHubH = sorted(NIdHubH, key=lambda key: NIdHubH[key], reverse=True) sorted_NIdAuthH = sorted(NIdAuthH, key=lambda key: NIdAuthH[key], reverse=True) with open(dic_path, 'rb') as dic_id: mydict = pickle.load(dic_id) print("3 most central players by PageRank scores: {}, {}, {}".format( name_from_index(sorted_prankH, 0, mydict), name_from_index(sorted_prankH, 1, mydict), name_from_index(sorted_prankH, 2, mydict))) print("Top 3 hubs: {}, {}, {}".format( name_from_index(sorted_NIdHubH, 0, mydict), name_from_index(sorted_NIdHubH, 1, mydict), name_from_index(sorted_NIdHubH, 2, mydict))) print("Top 3 authorities: {}, {}, {}".format( name_from_index(sorted_NIdAuthH, 0, mydict), name_from_index(sorted_NIdAuthH, 1, mydict), name_from_index(sorted_NIdAuthH, 2, mydict))) results["m. Three top PageRank"] = (name_from_index( sorted_prankH, 0, mydict), name_from_index( sorted_prankH, 1, mydict), name_from_index(sorted_prankH, 2, mydict)) results["n. Three top hubs"] = (name_from_index( sorted_NIdHubH, 0, mydict), name_from_index(sorted_NIdHubH, 1, mydict), name_from_index( sorted_NIdHubH, 2, mydict)) results["o. Three top authorities"] = (name_from_index( sorted_NIdAuthH, 0, mydict), name_from_index(sorted_NIdAuthH, 1, mydict), name_from_index( sorted_NIdAuthH, 2, mydict)) return results
import snap data = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1, '\t') # The number of weakly connected components in the network. Components = snap.TCnComV() snap.GetWccs(data, Components) print("Number of Weakly Connected Components:", Components.Len()) # The number of edges and the number of nodes in the largest weakly connected component MxWcc = snap.GetMxWcc(data) print("Number of MxWcc Edges:", MxWcc.GetEdges()) print("Number of MxWcc Nodes:", MxWcc.GetNodes()) # IDs of the top 3 most central nodes in the network by PagePank scores PRankH = snap.TIntFlt64H() snap.GetPageRank(data, PRankH) PRankH.SortByDat(False) i = 0 itr = PRankH.BegI() print("The top 3 most central nodes in the network by PagePank scores:") while i < 3: print("Node:", itr.GetKey()) itr.Next() i += 1 print("") # IDs of the top 3 hubs and top 3 authorities in the network by HITS scores. NIdHubH = snap.TIntFlt64H() NIdAuthH = snap.TIntFlt64H()
def get_weakly_connected_components_number(graph: snap.PNGraph): components = snap.TCnComV() snap.GetWccs(graph, components)
""" Created on Fri Jan 3 14:32:01 2020 @author: qiuwenjie """ ''' cs224w homework 0 Q3 ''' import snap as sp import numpy as np #load graph G = sp.LoadEdgeList(sp.PNGraph, "stackoverflow-Java.txt", 0, 1) Components = sp.TCnComV() sp.GetWccs(G, Components) WccsNum = len(Components) print("Q3.1 The number of weakly connected components in the network: ", WccsNum) MxWcc = sp.GetMxWcc(G) MxWccNodeNum = MxWcc.GetNodes() MxWccEdgesNum = MxWcc.GetEdges() print("Q3.2 %d edges and %d nodes in the largest weakly connected component."\ %(MxWccEdgesNum,MxWccNodeNum)) PRankH = sp.TIntFltH() sp.GetPageRank(G, PRankH) PRankHKey = [] PRankHVal = [] for item in PRankH:
import snap #Load the stack overflow grap G1 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) #1. Get the list of all weakly connected components Components = snap.TCnComV() snap.GetWccs(G1, Components) wccCount = 0 for Cc in Components: wccCount = wccCount + 1 print "1. Number of Weakly Connected Components: ", wccCount #2. Get The number of edges and the number # of nodes in the largest weakly connected component maxWcc = snap.GetMxWcc(G1) EdgeCount = 0 NodeCount = 0 for E in maxWcc.Edges(): EdgeCount = EdgeCount + 1 for N in maxWcc.Nodes(): NodeCount = NodeCount + 1 print "2. Number of edges and nodes in largest wcc" print "EdgeCount : ", EdgeCount print "NodeCount : ", NodeCount #3 Get The top 3 most central nodes in the network by PagePank scores PRankH = snap.TIntFltH()
# Rakshith Singh Assignment 1 - Stackoverflow Analysis # https://snap.stanford.edu/snappy/doc/reference/index-ref.html # https://stackoverflow.com/questions/10152131/how-do-i-index-the-3-highest-values-in-a-list import snap stackoverflow_graph = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt") #Question 1 - The number of weakly connected components in the network. Components = snap.TCnComV() snap.GetWccs(stackoverflow_graph, Components) print("Number of weakly connected components is ", len(Components)) #Question 2 - The number of edges and the number of nodes in the largest weakly connected component. MxWcc = snap.GetMxWcc(stackoverflow_graph) print("The number of nodes in the largest Wcc is ", MxWcc.GetNodes()) print("The number of edges in the largest Wcc is ", MxWcc.GetEdges()) #Question 3 - The top 3 most central nodes in the network by PagePank scores. PRankH = snap.TIntFltH() snap.GetPageRank(stackoverflow_graph, PRankH) first = 3496478 second = 3600470 third = 3399766 #for item in PRankH: # print(item, PRankH[item]) node_list = [] prank_list = [] for item in PRankH: node_list.append(item) prank_list.append(PRankH[item]) results = sorted(zip(prank_list, node_list), reverse=True)[:3] print("Top 3 Central nodes and their Page Ranks are")
plt.ylabel(r'$\log{Count}$') # plt.xlim(right=numpy.amax(x), left=0.0) # plt.ylim(top=max(numpy.amax(y), numpy.amax(y_reg)), bottom=0.0) handle_datpnt = plt.scatter(x, y, label='datapoint') handle_reg, = plt.plot(x, y_reg, color='red', label='least-square regression') plt.legend([handle_datpnt, handle_reg], ['datapoint', 'least-square regression']) plt.show() # Section 3 # print('*' * 10 + ' Section III ' + '*' * 10) # sof for stackoverflow sof_g = snap.LoadEdgeListStr(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) # connected components vector wcc_vec = snap.TCnComV() snap.GetWccs(sof_g, wcc_vec) print("The stackoverflow-Java graph has " + str(len(wcc_vec)) + " weakly connected components.") # largest_wcc = snap.GetMxWcc(sof_g) print('The largest weekly connected component of stackoverflow graph has ' + str(largest_wcc.GetNodes()) + ' nodes and ' + str(snap.CntUniqDirEdges(sof_g)) + ' edges.') rankscoremap = snap.TIntFltH() snap.GetPageRank(sof_g, rankscoremap) maplen = len(rankscoremap) rankscoremap.SortByDat() cnt = 0 print("If you use PageRank score, then") for item in rankscoremap:
import snap import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.linear_model import LinearRegression DATA_PATH = './stackoverflow-Java.txt' if __name__ == '__main__': # Build Java Stackoverflow Graph G1 = snap.LoadEdgeList(snap.PNGraph, DATA_PATH, 0, 1) # Get the number of Wcc Wccs = snap.TCnComV() snap.GetWccs(G1, Wccs) print("Weakly connected components: {:d}".format(len(list(Wccs)))) # Get the number of nodes and edges on the largest Wcc MxWcc = snap.GetMxWcc(G1) num_nodes = len(list(MxWcc.Nodes())) num_edges = len(list(MxWcc.Edges())) print("The largest WCC has {:d} nodes; {:d} edges".format( num_nodes, num_edges)) # Get top 3 RageRank IDs PRankH = snap.TIntFltH() snap.GetPageRank(G1, PRankH)
# Load the network SOURCE_FILE = './data/stackoverflow-Java.txt' SOGraph = snap.LoadEdgeList(snap.PNGraph, SOURCE_FILE, 0, 1) assert 146874 == SOGraph.GetNodes() assert 333606 == SOGraph.GetEdges() def sortTIntFltH(mapping, desc=True): return sorted([(nodeId, mapping[nodeId]) for nodeId in mapping], reverse=desc, key=lambda x: x[1]) # 3.1 components = snap.TCnComV() snap.GetWccs(SOGraph, components) print("The number of weakly connected components in the SO network" "is %s." % (len(components))) # 3.2 maxWeaklyConnectedComponent = snap.GetMxWcc(SOGraph) print("The largest weakly connected component in the SO network" "has %s nodes and %s edges." % (maxWeaklyConnectedComponent.GetNodes(), maxWeaklyConnectedComponent.GetEdges())) # 3.3 TOPN = 3 SOPageRanks = snap.TIntFltH() snap.GetPageRank(SOGraph, SOPageRanks, 0.85, 1e-4, 1000) sortedSOPageRanks = sortTIntFltH(SOPageRanks)
''' This script is for hw0-q2 ''' import snap g = snap.LoadEdgeList(snap.PNGraph, 'wiki-Vote.txt', 0, 1) wccgv = snap.TCnComV() snap.GetWccs(g, wccgv) print wccgv.Len() wccg = snap.GetMxWcc(g) print wccg.GetNodes() print wccg.GetEdges() PRankH = snap.TIntFltH() snap.GetPageRank(g, PRankH) PRankH.SortByDat(False) a = 0 for item in PRankH: if (a < 3): print item, PRankH[item] a = a + 1 else: break print "\n" HubH = snap.TIntFltH() AutH = snap.TIntFltH() snap.GetHits(g, HubH, AutH)
import snap # import os # Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000) # print os.system("pwd") Graph = snap.LoadEdgeList(snap.PNGraph, "../bitcoin_computed/txedgeunique.txt", 0, 1) G_Nodes = Graph.GetNodes() G_Edges = Graph.GetEdges() print "Graph: Nodes %d, Edges %d" % (G_Nodes, G_Edges) SCComponents = snap.TCnComV() WCComponents = snap.TCnComV() snap.GetSccs(Graph, SCComponents) snap.GetWccs(Graph, WCComponents) MaxWCCNodes = WCComponents[0] MaxSCCNodes = SCComponents[0] # print type(MaxSccNodes) print MaxSCCNodes.Len() print MaxWCCNodes.Len() # Iterate over each edge and check for In, Out SCCHashmap = snap.TIntH() for node in MaxSCCNodes: SCCHashmap.AddKey(node) InOutHashmap = snap.TIntH() for node in MaxWCCNodes:
def Get_Connected_Components(G): Components = snap.TCnComV() snap.GetWccs(G, Components) for CnCom in Components: print("Size of component: %d" % CnCom.Len())
# P3 of hw1 import snap # 1) The number of weakly connected components g1 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) Components = snap.TCnComV() snap.GetWccs(g1, Components) cnt = 0 for ele in Components: #print "Size of Component: %d" % (ele.Len()) cnt += 1 print "1) number of weakly connected components: %d" % (cnt) # 2) The number of edges and the number of nodes g2 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) MxWcc = snap.GetMxWcc(g2) cnt_edge = 0 cnt_node = 0 for ele in MxWcc.Edges(): cnt_edge += 1 for ele in MxWcc.Nodes(): cnt_node += 1 print "2) Edges = %d, Node = %d" % (cnt_edge, cnt_node) # 3) g3 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) PRankH = snap.TIntFltH() snap.GetPageRank(g3, PRankH) max = 0
def CombineWords(allwords, orderby=['T', 'C']): page_cands = {} # pageid : [OCRwords] page_words = {} # pageid : [[cand1, cand2], [cand1, cand2, cand3]..] # where page_cands_order = { } # ocrid : pageid : cand_sub : ori_ocr_order (inverted index, only for "orderby" ocrs) for ocrid in allwords: index = allwords[ocrid] page_cands_order[ocrid] = {} for pageid in index: if pageid not in page_cands: page_cands[pageid] = [] if pageid not in page_cands_order[ocrid]: page_cands_order[ocrid][pageid] = {} for ocr_sub in range(0, len(index[pageid])): pair = index[pageid][ocr_sub] box = pair[0] word = pair[1] # New: handle C bad outputs if box.GetLeft() == 0 and box.GetUp() == 0 and ocrid == 'C': print >> sys.stderr, box.GetPrinted(), word.GetContent() continue # Ignore this candidate page_cands[pageid].append( (ocrid, ) + pair) # (ocrid, box, word) # if ocrid == orderby: # slow... TODO cand_sub = len(page_cands[pageid]) - 1 page_cands_order[ocrid][pageid][cand_sub] = ocr_sub # print page_cands_order # raw_input() wccsizes = {} for pageid in page_cands: nodes = page_cands[pageid] # [(ocrid, box, word), ...] graph = snap.PUNGraph.New() # Undirected graph for i in range(0, len(nodes)): _ret = graph.AddNode(i) # Add overlapping edges in undirected graph for i in range(0, len(nodes)): for j in range(i + 1, len(nodes)): b1 = nodes[i][1] b2 = nodes[j][1] if b1.IsOverlapSamePage(b2): graph.AddEdge(i, j) # TODO After aggregate by WCCs, we need a proper order. # Now picking order by first word in Tesseract Order! # If not appearing in Tesseract, pick order as Cuneiform order. (how?) or neglect??? words = [ ] # candidates separated: [ [order, [cand1, cand2]], [order, [cand1]], ...] wccs = snap.TCnComV() snap.GetWccs(graph, wccs) order_comp = {} for comp in wccs: wccsz = comp.Len() if wccsz not in wccsizes: wccsizes[wccsz] = 0 wccsizes[wccsz] += 1 # print "Size of component: %d" % comp.Len() # for arr in [[nodes[nid][0], nodes[nid][1].GetPrinted(), nodes[nid][2].GetContent()] for nid in comp]: # print ' ', '\t'.join(arr) this_cc = [nodes[nid] for nid in comp] this_order = [NO_ORDER_MARK] * len(orderby) for nid in comp: ocrid = nodes[nid][0] ocrorder = page_cands_order[ocrid][pageid][nid] if ocrid in orderby: i = orderby.index(ocrid) if this_order[ i] == NO_ORDER_MARK or this_order[i] > ocrorder: this_order[i] = ocrorder # else: # words.append( (this_order, [nodes[nid] for nid in comp]) ) words.append((this_order, [nodes[nid] for nid in comp])) # raw_input() words.sort(cmp=order_compare, key=lambda word: word[0]) # print 'Top 100 Orders:', [w[0] for w in words][:100] # print 'None Orders:' # for l in [ # [w[0] # # +[w[1][0][2].GetContent()] # for w in words[i - 3 : i + 3]] # for i in range(3, len(words) - 3) # if None in words[i][0] # ]: # print l # Test case: # python alignment/Align.py /Users/Robin/Documents/repos/deepdive_ocr/deepdive_danny/app/ocr/data/html-labels-accurate/data-labeled/ocroutput/JOURNAL_28971.pdf.task/ # [7, 6], [8, 7], [9, 9], [12, 13], [13, 10], [14, None], [15, None], [16, None], [17, None], [18, None] # [19, None], ... , [94, None] # [95, None], [96, None], [97, None], [98, None], [None, 14], [None, 15], [99, 16], [102, 17], [104, 18], [105, 19] test_orders = [w[0] for w in words] words = [w[1] for w in words] # remove order for i in range(len(words)): word = words[i] order = test_orders[i] # if order[0] == None: # print 'Order:', order # print len(word), 'candidates...' # for can in word: # print '\t'.join([can[0], can[1].GetPrinted(), can[2].GetContent()]) # # if len(word) != 2 or len(word) >= 2 and word[0][2].GetContent() != word[1][2].GetContent(): # # raw_input() # if len(word) == 1: # raw_input() page_words[pageid] = words return page_words, wccsizes