def get_hits_venues(): mapping = snap.TStrIntSH() t0 = time() file_output_1 = open("paper_venues_hits_hub.txt", 'w') file_output_2 = open("paper_venues_hits_auth.txt", 'w') G0 = snap.LoadEdgeListStr(snap.PNGraph, "paperid_venueid_ref.txt", 0, 1, mapping) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G0, NIdHubH, NIdAuthH, 1000) print("HITS time:", round(time() - t0, 3), "s") for item in NIdHubH: file_output_1.write( str(mapping.GetKey(item)) + "," + str(NIdHubH[item]) + '\n') for item in NIdAuthH: file_output_2.write( str(mapping.GetKey(item)) + "," + str(NIdAuthH[item]) + '\n') # convert input string to node id # NodeId = mapping.GetKeyId("814DF491") # convert node id to input string # NodeName = mapping.GetKey(NodeId) # print "name", NodeName # print "id ", NodeId print("finish hits!") file_output_1.close() file_output_2.close()
def quick_properties(graph, name, dic_path): """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """ n_edges = graph.GetEdges() n_nodes = graph.GetNodes() print("##########") print("Quick overview of {} Network".format(name)) print("##########") print("{} Nodes, {} Edges").format(n_nodes, n_edges) print("{} Self-edges ".format(snap.CntSelfEdges(graph))) print("{} Directed edges, {} Undirected edges".format( snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph))) print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph))) print("{} 0-out-degree nodes, {} 0-in-degree nodes".format( snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0))) node_in = graph.GetNI(snap.GetMxInDegNId(graph)) node_out = graph.GetNI(snap.GetMxOutDegNId(graph)) print("Maximum node in-degree: {}, maximum node out-degree: {}".format( node_in.GetDeg(), node_out.GetDeg())) print("###") components = snap.TCnComV() snap.GetWccs(graph, components) max_wcc = snap.GetMxWcc(graph) print "{} Weakly connected components".format(components.Len()) print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(), max_wcc.GetEdges()) prankH = snap.TIntFltH() snap.GetPageRank(graph, prankH) sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(graph, NIdHubH, NIdAuthH) sorted_NIdHubH = sorted(NIdHubH, key=lambda key: NIdHubH[key], reverse=True) sorted_NIdAuthH = sorted(NIdAuthH, key=lambda key: NIdAuthH[key], reverse=True) with open(dic_path, 'rb') as dic_id: mydict = pickle.load(dic_id) print("3 most central players by PageRank scores: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])], list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_prankH[2])])) print("Top 3 hubs: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[2])])) print("Top 3 authorities: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[2])]))
def getNodeAttributes(self,UGraph): attriList=[] for index in range(UGraph.GetNodes()): nodelist=[] attriList.append(nodelist) #page rank PRankH = snap.TIntFltH() snap.GetPageRank(UGraph, PRankH) counter=0 for item in PRankH: attriList[counter].append(PRankH[item]) counter+=1 #HIN counter=0 NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(UGraph, NIdHubH, NIdAuthH) for item in NIdHubH: attriList[counter].append(NIdHubH[item]) attriList[counter].append(NIdAuthH[item]) counter+=1 # Betweenness Centrality counter=0 Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0) for node in Nodes: attriList[counter].append(Nodes[node]) counter+=1 # closeness centrality counter=0 for NI in UGraph.Nodes(): CloseCentr = snap.GetClosenessCentr(UGraph, NI.GetId()) attriList[counter].append(CloseCentr) counter+=1 # farness centrality counter=0 for NI in UGraph.Nodes(): FarCentr = snap.GetFarnessCentr(UGraph, NI.GetId()) attriList[counter].append(FarCentr) counter+=1 # node eccentricity counter=0 for NI in UGraph.Nodes(): attriList[counter].append(snap.GetNodeEcc(UGraph, NI.GetId(), True)) counter+=1 atrriMarix=np.array(attriList) return atrriMarix
def hits(graph_filename): # create graph name_id_map = snap.TStrIntSH() graph = snap.LoadEdgeListStr(snap.PNGraph, graph_filename, 0, 1, name_id_map) # run HITS algo id_hub_map = snap.TIntFltH() id_auth_map = snap.TIntFltH() snap.GetHits(graph, id_hub_map, id_auth_map, 1000) # iterate 1000 times return name_id_map, id_hub_map, id_auth_map
def calc_HubAndAuthorityScores(Graph, node_to_g): ## calculate Hub and Authority scores for nodes in the graph. prot_to_hub = {} prot_to_authority = {} NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(Graph, NIdHubH, NIdAuthH) for node in NIdHubH: my_prot = node_to_g[node] prot_to_hub[my_prot] = NIdHubH[node] for node in NIdAuthH: my_prot = node_to_g[node] prot_to_authority[my_prot] = NIdAuthH[node] return (prot_to_hub, prot_to_authority)
def compute_hub_authority_score(self, graph): # A hash table of int keys and float values (output). # The keys are the node ids and the values are the hub scores as outputed by the HITS algorithm. # Type: snap.TIntFltH hub_scores = snap.TIntFltH() # A hash table of int keys and float values (output) # The keys are the node ids and the values are the authority scores as outputed by the HITS algorithm. # Type: snap.TIntFltH authority_scores = snap.TIntFltH() snap.GetHits(graph, hub_scores, authority_scores) return hub_scores, authority_scores
def HITS(G): NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) max = 0.0 for item in NIdHubH: if NIdHubH[item] > max: max = NIdHubH[item] print item, NIdHubH[item] max = 0.0 for item in NIdAuthH: if NIdAuthH[item] > max: max = NIdAuthH[item] print item, NIdAuthH[item]
def main(): network = snap.LoadEdgeList( snap.PNEANet, "/Users/qingyuan/CS224W/stackoverflow-Java.txt", 0, 1) Components = snap.TCnComV() snap.GetWccs(network, Components) print("The number of weakly connected components is %d" % Components.Len()) MxWcc = snap.GetMxWcc(network) print( "The number of edges is %d and the number of nodes is %d in the largest weakly connected component." % (MxWcc.GetNodes(), MxWcc.GetEdges())) PRankH = snap.TIntFltH() snap.GetPageRank(network, PRankH) PRankH.SortByDat(False) num = 0 print( "IDs of the top 3 most central nodes in the network by PagePank scores. " ) for item in PRankH: print(item, PRankH[item]) num += 1 if num == 3: num = 0 break NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(network, NIdHubH, NIdAuthH) NIdHubH.SortByDat(False) print("IDs of the top 3 hubs in the network by HITS scores. ") for item in NIdHubH: print(item, NIdHubH[item]) num += 1 if num == 3: num = 0 break NIdAuthH.SortByDat(False) print("IDs of top 3 authorities in the network by HITS scores. ") for item in NIdAuthH: print(item, NIdAuthH[item]) num += 1 if num == 3: num = 0 break
def compute_HITS(Graph): ''' :param Graph: the graph to compute HITS on :return: 1. list of tuple (hub_score, node_id) in descending order 2. list of tuple (authority_score, node_id) in descending order ''' NIdHubH = snap.TIntFltH() # placeholder for hub NIdAuthH = snap.TIntFltH() # placeholder for authority snap.GetHits(Graph, NIdHubH, NIdAuthH) listAuth = [] listHub = [] for item in NIdHubH: listHub.append((NIdHubH[item], item)) for item in NIdAuthH: listAuth.append((NIdAuthH[item], item)) return sorted(listHub)[::-1], sorted(listAuth)[::-1]
def hubs_and_authorities_score(self, MaxIter=20): ''' Computes the Hubs and Authorities score of every node in Graph return tuple of hubs score and authorrities score :param MaxIter: Maximum number of iterations. ''' snap = self.snap ret1 = [] ret2 = [] NIdHubH = snap.TIntFlt64H() NIdAuthH = snap.TIntFlt64H() snap.GetHits(self.graph, NIdHubH, NIdAuthH, MaxIter) for item in NIdHubH: ret1.append((item, NIdHubH[item])) for item in NIdAuthH: ret2.append((item, NIdAuthH[item])) return ret1, ret2
def get_hits(net, label, outpath): """ get hits centrality. For directed graph :param net: :param label: :param outpath: :return: """ NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(net, NIdHubH, NIdAuthH) hub_file = open(outpath + label + '-hub', 'w') hub_top_file = open(outpath + label + '-hub-top100', 'w') authority_file = open(outpath + label + '-authority', 'w') authority_top_file = open(outpath + label + '-authority-top100', 'w') # process hub hub = {} for item in NIdHubH: hub[item] = NIdHubH[item] hub = sorted(hub.items(), key=operator.itemgetter(1), reverse=True) hub_id, hub_value = zip(*hub) for i in range(len(hub_id)): hub_file.write(str(hub_id[i]) + '\t' + str(hub_value[i]) + '\n') for i in range(100): hub_top_file.write(str(hub_id[i]) + '\t' + str(hub_value[i]) + '\n') # process authority authority = {} for item in NIdAuthH: authority[item] = NIdAuthH[item] authority = sorted(authority.items(), key=operator.itemgetter(1), reverse=True) authority_id, authority_value = zip(*authority) for i in range(len(authority_id)): authority_file.write(str(authority_id[i]) + '\t' + str(authority_value[i]) + '\n') for i in range(100): authority_top_file.write(str(authority_id[i]) + '\t' + str(authority_value[i]) + '\n') hub_file.close() hub_top_file.close() authority_file.close() authority_top_file.close() return hub, authority
def q3(): G = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) components = snap.TCnComV() snap.GetWccs(G, components) print("Number of WCC: ", components.Len()) MxComp = snap.GetMxWcc(G) cnt_mxc_node = 0 cnt_mxc_edge = 0 for _ in MxComp.Nodes(): cnt_mxc_node += 1 for _ in MxComp.Edges(): cnt_mxc_edge += 1 print("Number of edges and nodes in MxWCC: ", cnt_mxc_node, ' ', cnt_mxc_edge) PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) scores = [] for id in PRankH: scores.append((PRankH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 PageRank scores: ", res) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) scores = [] for id in NIdHubH: scores.append((NIdHubH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 hubs by HITS scores: ", res) scores = [] for id in NIdAuthH: scores.append((NIdAuthH[id], id)) res = sorted(scores, reverse=True)[:3] print("IDs of top 3 authorities by HITS scores: ", res)
def computeAuthHubScore(G, NodeAttributes): NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) HubNodes = [] for nodeId in NIdHubH: HubNodes.append((nodeId, NIdHubH[nodeId])) NodeAttributes[nodeId]['HubScore'] = NIdHubH[nodeId] HubNodes.sort(key=lambda x: x[1], reverse=True) AuthNodes = [] for nodeId in NIdAuthH: AuthNodes.append((nodeId, NIdAuthH[nodeId])) NodeAttributes[nodeId]['AuthScore'] = NIdAuthH[nodeId] AuthNodes.sort(key=lambda x: x[1], reverse=True) #print AuthNodes[0], AuthNodes[-1] #print HubNodes[0], HubNodes[-1] minAuthNodes = AuthNodes[-1][1] maxAuthNodes = AuthNodes[0][1] minHubNodes = HubNodes[-1][1] maxHubNodes = HubNodes[0][1] for (node, hubScore) in HubNodes: normHubScore = (hubScore - minHubNodes) / (maxHubNodes - minHubNodes) NodeAttributes[node]['normHubScore'] = normHubScore for (node, authScore) in AuthNodes: normAuthScore = (authScore - minAuthNodes) / (maxAuthNodes - minAuthNodes) NodeAttributes[node]['normAuthScore'] = normAuthScore #print NodeAttributes[1874] #print NodeAttributes[893] return NodeAttributes
def partThree(): data_dir_StackOverFlow = './data/stackoverflow-Java.txt' sofG = snap.LoadEdgeList(snap.PNGraph, data_dir_StackOverFlow, 0, 1, '\t') Components = snap.TCnComV() snap.GetWccs(sofG, Components) print('1. The number of weakly connected components in the network.: '+str(Components.Len())) MxWcc = snap.GetMxWcc(sofG) num_node = MxWcc.GetNodes() num_deg = MxWcc.GetEdges() print('2. The number of edges is {} and the number of nodes is {}'.format(num_deg, num_node)) PRankH = snap.TIntFltH() snap.GetPageRank(sofG, PRankH) cnt = 0 print('3. ') for item in PRankH: cnt += 1 if cnt > 3: break print(item, PRankH[item]) print('4. ') NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(sofG, NIdHubH, NIdAuthH) HubDict = {} AuthDict = {} for item in NIdHubH: HubDict[item] = NIdHubH[item] a = zip(HubDict.values(), HubDict.keys()) print(list(sorted(a, reverse=True))[:3]) for item in NIdAuthH: AuthDict[item] = NIdAuthH[item] b = zip(AuthDict.values(), AuthDict.keys()) print(list(sorted(b, reverse=True))[:3])
def stackoverflow(): g = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) components = snap.TCnComV() snap.GetWccs(g, components) print "Num connected comp = ", components.Len() mxwcc = snap.GetMxWcc(g) print "Num edges in largest = ", mxwcc.GetEdges() print "Num nodes in largest = ", mxwcc.GetNodes() rank = snap.TIntFltH() snap.GetPageRank(g, rank) rank.SortByDat(False) count = 0 for node in rank: if count >= 3: break count += 1 print "largest page rank score nodes = ", node, " (score = ", rank[node] hubs = snap.TIntFltH() auths = snap.TIntFltH() snap.GetHits(g, hubs, auths) hubs.SortByDat(False) count = 0 for node in hubs: if count >= 3: break count += 1 print "largest hub score nodes = ", node, " (score = ", hubs[node] auths.SortByDat(False) count = 0 for node in auths: if count >= 3: break count += 1 print "largest auth score nodes = ", node, " (score = ", auths[node]
def model_Hits(G): print('*********Computes the Hubs and Authorities score of every node in Graph********') node = [] node2 = [] score_NIdHubH = [] score_NIdAuthH = [] NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) for item in NIdHubH: node.append(item) score_NIdHubH.append(NIdHubH[item]) for item in NIdAuthH: node2.append(item) score_NIdAuthH.append(NIdAuthH[item]) data = pd.DataFrame({'node': node, 'score_NIdHubH': score_NIdHubH, 'node2': node2, 'score_NIdAuthH': score_NIdAuthH}) data = data.sort_values(by="score_NIdHubH", ascending=True) # # 写入 data.to_csv('./data/picture/model_Hits.csv') print('model_Hits = ', data[-5:])
def quick_properties(graph, name, dic_path): """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """ results = {} n_edges = graph.GetEdges() n_nodes = graph.GetNodes() n_self_edges = snap.CntSelfEdges(graph) n_directed_edges, n_undirected_edges = snap.CntUniqDirEdges( graph), snap.CntUniqUndirEdges(graph) n_reciprocated_edges = snap.CntUniqBiDirEdges(graph) n_zero_out_nodes, n_zero_in_nodes = snap.CntOutDegNodes( graph, 0), snap.CntInDegNodes(graph, 0) max_node_in = graph.GetNI(snap.GetMxInDegNId(graph)).GetDeg() max_node_out = graph.GetNI(snap.GetMxOutDegNId(graph)).GetDeg() components = snap.TCnComV() snap.GetWccs(graph, components) max_wcc = snap.GetMxWcc(graph) results["a. Nodes"] = n_nodes results["b. Edges"] = n_edges results["c. Self-edges"] = n_self_edges results["d. Directed edges"] = n_directed_edges results["e. Undirected edges"] = n_undirected_edges results["f. Reciprocated edges"] = n_reciprocated_edges results["g. 0 out-degree nodes"] = n_zero_out_nodes results["h. 0 in-degree nodes"] = n_zero_in_nodes results["i. Maximum node out-degree"] = max_node_out results["j. Maximum node in-degree"] = max_node_in results["k. Weakly connected components"] = components.Len() results["l. Nodes, edges of largest WCC"] = (max_wcc.GetNodes(), max_wcc.GetEdges()) print("##########") print("Quick overview of {} Network".format(name)) print("##########") print("{} Nodes, {} Edges".format(n_nodes, n_edges)) print("{} Self-edges ".format(n_self_edges)) print("{} Directed edges, {} Undirected edges".format( n_directed_edges, n_undirected_edges)) print("{} Reciprocated edges".format(n_reciprocated_edges)) print("{} 0-out-degree nodes, {} 0-in-degree nodes".format( n_zero_out_nodes, n_zero_in_nodes)) print("Maximum node in-degree: {}, maximum node out-degree: {}".format( max_node_in, max_node_out)) print("###") print "{} Weakly connected components".format(components.Len()) print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(), max_wcc.GetEdges()) prankH = snap.TIntFltH() snap.GetPageRank(graph, prankH) sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(graph, NIdHubH, NIdAuthH) sorted_NIdHubH = sorted(NIdHubH, key=lambda key: NIdHubH[key], reverse=True) sorted_NIdAuthH = sorted(NIdAuthH, key=lambda key: NIdAuthH[key], reverse=True) with open(dic_path, 'rb') as dic_id: mydict = pickle.load(dic_id) print("3 most central players by PageRank scores: {}, {}, {}".format( name_from_index(sorted_prankH, 0, mydict), name_from_index(sorted_prankH, 1, mydict), name_from_index(sorted_prankH, 2, mydict))) print("Top 3 hubs: {}, {}, {}".format( name_from_index(sorted_NIdHubH, 0, mydict), name_from_index(sorted_NIdHubH, 1, mydict), name_from_index(sorted_NIdHubH, 2, mydict))) print("Top 3 authorities: {}, {}, {}".format( name_from_index(sorted_NIdAuthH, 0, mydict), name_from_index(sorted_NIdAuthH, 1, mydict), name_from_index(sorted_NIdAuthH, 2, mydict))) results["m. Three top PageRank"] = (name_from_index( sorted_prankH, 0, mydict), name_from_index( sorted_prankH, 1, mydict), name_from_index(sorted_prankH, 2, mydict)) results["n. Three top hubs"] = (name_from_index( sorted_NIdHubH, 0, mydict), name_from_index(sorted_NIdHubH, 1, mydict), name_from_index( sorted_NIdHubH, 2, mydict)) results["o. Three top authorities"] = (name_from_index( sorted_NIdAuthH, 0, mydict), name_from_index(sorted_NIdAuthH, 1, mydict), name_from_index( sorted_NIdAuthH, 2, mydict)) return results
def manage_graphs(out_degree, nodes, max_minutes): rnd = snap.TRnd(1, 0) graph = snap.GenSmallWorld(nodes, out_degree, 0.7, rnd) print(40 * "#") print(f"Starting Graph for #{nodes} Nodes.") # Save the graph in order to reload it after manipulation output_filename = f"temporary_graphs/{nodes}_ws_graph.graph" f_out = snap.TFOut(output_filename) graph.Save(f_out) f_out.Flush() # Highest rank Node max_degree_node = graph.GetNI(snap.GetMxDegNId(graph)) print(f"Highest Degree Node ID#{max_degree_node.GetId()}" f" with Degree={max_degree_node.GetDeg()}") # Gets Hubs and Authorities of all the nodes hubs_per_node = snap.TIntFltH() auths_per_node = snap.TIntFltH() snap.GetHits(graph, hubs_per_node, auths_per_node) max_hub_node = graph.GetNI( max(hubs_per_node, key=lambda h: hubs_per_node[h])) print(f"Highest Hub Score Node ID#{max_hub_node.GetId()}" f" with Score={hubs_per_node[max_hub_node.GetId()]}") max_authority_node = graph.GetNI( max(auths_per_node, key=lambda a: auths_per_node[a])) print(f"Highest Authority Score Node ID#{max_authority_node.GetId()}" f" with Score={hubs_per_node[max_authority_node.GetId()]}") exceed = False # CNM Community Detector cnm_community = snap.TCnComV() cnm_thread = threading.Thread(target=snap.CommunityCNM, args=(graph, cnm_community)) cnm_start_time = time.time() try: cnm_thread.start() cnm_thread.join(max_minutes) except MemoryError: exceed = True finally: cnm_stop_time = time.time() cnm_community_exec_time = cnm_stop_time - cnm_start_time exceed |= max_minutes <= cnm_community_exec_time # GN Community Detector if max_minutes > cnm_community_exec_time and not exceed: gn_community = snap.TCnComV() gn_thread = threading.Thread(target=snap.CommunityGirvanNewman, args=(graph, gn_community)) gn_start_time = time.time() try: gn_thread.start() gn_thread.join(max_minutes - cnm_community_exec_time) except MemoryError: exceed = True finally: gn_stop_time = time.time() gn_community_exec_time = gn_stop_time - gn_start_time exceed |= gn_community_exec_time >= max_minutes - cnm_community_exec_time else: gn_community_exec_time = 0.00 if not exceed: print( f"Execution Time for CNM Communities Detector is {round(cnm_community_exec_time, 4):.4f}" ) print( f"Execution Time for GN Communities Detector is {round(gn_community_exec_time, 4):.4f}" ) else: print( f"Graph with Nodes#{nodes_num} exceeded the valid calculation limits." ) print(40 * "#") # load graph in it's initial State f_in = snap.TFIn(output_filename) graph = snap.TUNGraph.Load(f_in) return graph, cnm_community_exec_time, gn_community_exec_time, exceed
print("The largest weakly connected component in the SO network" "has %s nodes and %s edges." % (maxWeaklyConnectedComponent.GetNodes(), maxWeaklyConnectedComponent.GetEdges())) # 3.3 TOPN = 3 SOPageRanks = snap.TIntFltH() snap.GetPageRank(SOGraph, SOPageRanks, 0.85, 1e-4, 1000) sortedSOPageRanks = sortTIntFltH(SOPageRanks) print("The node IDs of the top %s most central nodes in the network " "by PageRank scores are %s with scores %s respectively." % (TOPN, tuple(t[0] for t in sortedSOPageRanks[:TOPN]), tuple(t[1] for t in sortedSOPageRanks[:TOPN]))) # 3.4 TOPN = 3 hubsScores = snap.TIntFltH() authScores = snap.TIntFltH() snap.GetHits(SOGraph, hubsScores, authScores, 100) sortedHubScores = sortTIntFltH(hubsScores) sortedAuthScores = sortTIntFltH(authScores) print("The node IDs of the top %s hubs in the network by HITS scores " "are %s with scores %s respectively." % (TOPN, tuple(t[0] for t in sortedHubScores[:TOPN]), tuple(t[1] for t in sortedHubScores[:TOPN]))) print print("The node IDs of the top %s authorities in the network by HITS " "scores are %s with score %s respectively." % (TOPN, tuple(t[0] for t in sortedAuthScores[:TOPN]), tuple(t[1] for t in sortedAuthScores[:TOPN])))
edges = questions.Join("t1.AcceptedAnswerId", posts, "PostId") t.show("join", edges) # Create haskell-specific Q&A graph # >>> graph = posts.graph('Asker', 'Expert', directed = True) edges.SetSrcCol("t1_t2.Asker") edges.SetDstCol("t1.Expert") graph = snap.ToGraph(edges, snap.aaFirst) t.show("graph", graph) # Compute Authority score # >>> hits = graph.hits('Authority', 'Hub') # note: the code below creates a table (Node name, Authority score) - the hub score is not used HTHub = snap.TIntFltH() HTAuth = snap.TIntFltH() snap.GetHits(graph, HTHub, HTAuth) authority = snap.TTable.New("authority", HTAuth, "Expert", AUTHORITY_ATTRIBUTE, context, snap.TBool(False)) t.show("authority score", authority) # b) Compute comment scores # Load comments # >>> comments = ringo.load('comments.tsv') S = snap.Schema() S.Add(snap.TStrTAttrPr("UserId", snap.atInt)) S.Add(snap.TStrTAttrPr("PostId", snap.atInt)) comments = snap.TTable.LoadSS("comments", S, commentsFile, context, '\t', snap.TBool(False)) t.show("load", comments)
# Find Top-30 nodes of PageRank page_rank_scores = snap.TIntFltH() snap.GetPageRank(largest_graph, page_rank_scores) top_thirty_nodes_ids = sorted(page_rank_scores, key=lambda n: page_rank_scores[n], reverse=True)[:30] top_thirty_nodes_ids.sort() top_thirty_nodes_page_rank = [ page_rank_scores[node_id] for node_id in top_thirty_nodes_ids ] # Gets Hubs and Authorities of all the nodes hubs_per_node = snap.TIntFltH() auths_per_node = snap.TIntFltH() snap.GetHits(largest_graph, hubs_per_node, auths_per_node) top_thirty_hubs = [ hubs_per_node[node_id] for node_id in top_thirty_nodes_ids ] top_thirty_authorities = [ auths_per_node[node_id] for node_id in top_thirty_nodes_ids ] # # Find betweenness nodes_betweenness = snap.TIntFltH() edge_betweenness = snap.TIntPrFltH() betweenness_centrality = snap.GetBetweennessCentr( largest_graph, nodes_betweenness, edge_betweenness, 1.0) top_thirty_betweenness = [ nodes_betweenness[node_id] for node_id in top_thirty_nodes_ids ]
def get_HITS_scores(G, n): NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) return NIdHubH[n], NIdAuthH[n]
rankscoremap = snap.TIntFltH() snap.GetPageRank(sof_g, rankscoremap) maplen = len(rankscoremap) rankscoremap.SortByDat() cnt = 0 print("If you use PageRank score, then") for item in rankscoremap: if cnt >= maplen - 3: print("\tTop " + str(maplen - cnt) + " node is " + str(item) + " with score " + str(rankscoremap[item])) cnt = cnt + 1 hubscore = snap.TIntFltH() authscore = snap.TIntFltH() snap.GetHits(sof_g, hubscore, authscore) cnt = 0 hubscore.SortByDat() authscore.SortByDat() print("If you use HITS score, then") for item in authscore: if cnt >= maplen - 3: print("\tTop " + str(maplen - cnt) + " node is " + str(item) + " with authority " + str(authscore[item])) cnt = cnt + 1 print('\t' + '-' * 45) cnt = 0 for item in hubscore: if cnt >= maplen - 3:
def main(): parser = ArgumentParser("node_heu",formatter_class=ArgumentDefaultsHelpFormatter,conflict_handler='resolve') # Required arguments parser.add_argument("--network", type=str, required=True, help='The path and name of the .mat file containing the adjacency matrix and node labels of the input network') parser.add_argument("--edgelist", type=str, required=True, help='The path and name of the edgelist file with no weights containing the edgelist of the input network') parser.add_argument("--dataset", type=str, required=True, help='The name of your dataset (used for output)') # Optional arguments parser.add_argument("--adj_matrix_name", default='network', help='The name of the adjacency matrix inside the .mat file') parser.add_argument("--label_matrix_name", default='group', help='The name of the labels matrix inside the .mat file') args = parser.parse_args() print (args) mat, A, graph, labels_matrix, labels_count, indices = load_graph(args.network, args.adj_matrix_name, args.label_matrix_name) s_time = time.time() # Load edgelist as undirected graph in SNAP G = snap.LoadEdgeList(snap.PUNGraph, args.edgelist) print ("Loading graph in SNAP ... {}".format(str(args.edgelist))) # Load edgelist for networkx G_NETX = nx.read_edgelist(args.edgelist) print ("Loading graph in NetworkX .... {}".format(str(args.edgelist))) # Get Average Neighbor Degreeh from NetworkX (only time NetworkX is used) AvgNeighDe = nx.average_neighbor_degree(G_NETX) # Calculate Page Rank p_time = time.time() PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) print ("Finished in Page rank in {}".format(str(time.time()-p_time))) # Calculate Hub and Authrity Scores h_time = time.time() NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) print ("Finished in Hub and Auth Scores in {}".format(str(time.time()-h_time))) count = 0 node_data = [] fl_100 = time.time() print ("Num of nodes: {}".format(len(PRankH))) print ("Num of nodes with labels: {}".format(len(indices))) print ("Collecting other features for each node ...") for n in G.Nodes(): nid = n.GetId() if nid in indices: node_data.append((nid, n.GetInDeg(), PRankH[n.GetId()], snap.GetNodeClustCf(G, nid), NIdHubH[n.GetId()], NIdAuthH[n.GetId()], AvgNeighDe[str(nid)], snap.GetNodeEcc(G, nid))) count = count + 1 if count % 1000 == 0: print ("Processed {} nodes".format(str(count))) print (time.time() - fl_100) fl_100 = time.time() nhdf = pd.DataFrame(node_data, columns=('NodeId', 'Degree', 'PageRankScore', 'NodeClustCf', 'HubScore', 'AuthScore', 'AverageNeighborDegree', 'NodeEcc')) nhdf.to_csv((args.network.replace(".mat", "") + "_node_heuristic_features.csv"), index=False) print ("File saved at {}".format((args.network.replace(".mat", "") + "_node_heuristic_features.csv"))) nhdf = pd.DataFrame(node_data, columns=('NodeId', 'Degree', 'PageRankScore', 'NodeClustCf', 'HubScore', 'AuthScore', 'AverageNeighborDegree', 'NodeEcc')) nhdf.to_csv((args.network.replace(".mat", "") + "_node_heuristic_features.csv"), index=False) print ("File saved at {}".format((args.network.replace(".mat", "") + "_node_heuristic_features.csv"))) print ("Finished in {}".format(str(time.time()-s_time)))
# print(item, PRankH[item]) node_list = [] prank_list = [] for item in PRankH: node_list.append(item) prank_list.append(PRankH[item]) results = sorted(zip(prank_list, node_list), reverse=True)[:3] print("Top 3 Central nodes and their Page Ranks are") for result in results: a, b = result print(b, a) #Question 4 - The top 3 hubs and top 3 authorities in the network by HITS scores. NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(stackoverflow_graph, NIdHubH, NIdAuthH) tmp_lst1 = [] tmp_lst2 = [] for item in NIdHubH: tmp_lst1.append(item) tmp_lst2.append(NIdHubH[item]) hub_results = sorted(zip(tmp_lst2, tmp_lst1), reverse=True)[:3] tmp_lst3 = [] tmp_lst4 = [] for item in NIdAuthH: tmp_lst3.append(item) tmp_lst4.append(NIdAuthH[item]) auth_results = sorted(zip(tmp_lst4, tmp_lst3), reverse=True)[:3] print("Top 3 Hubs and their hit scores are")
def getAttribute(filename): UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1) UGraph.Dump() attributes = pd.DataFrame(np.zeros(shape=(UGraph.GetNodes(), 12)), columns=['Graph', 'Id', 'Degree', 'DegreeCentrality', 'NodeBetweennessCentrality', 'ClosenessCentrality', 'FarnessCentrality', 'PageRank', 'HubsScore', 'AuthoritiesScore', 'NodeEccentricity', 'EigenvectorCentrality']) attributes['Graph'] = [filename] * UGraph.GetNodes() # Degree id = [] degree = [] OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(UGraph, OutDegV) for item in OutDegV: id.append(item.GetVal1()) degree.append(item.GetVal2()) attributes['Id'] = id attributes['Degree'] = degree # Degree, Closeness, Farness Centrality, Node Eccentricity degCentr = [] cloCentr = [] farCentr = [] nodeEcc = [] for NI in UGraph.Nodes(): degCentr.append(snap.GetDegreeCentr(UGraph, NI.GetId())) cloCentr.append(snap.GetClosenessCentr(UGraph, NI.GetId())) farCentr.append(snap.GetFarnessCentr(UGraph, NI.GetId())) nodeEcc.append(snap.GetNodeEcc(UGraph, NI.GetId(), False)) attributes['DegreeCentrality'] = degCentr attributes['ClosenessCentrality'] = cloCentr attributes['FarnessCentrality'] = farCentr attributes['NodeEccentricity'] = nodeEcc # Betweenness Centrality betCentr = [] Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0) for node in Nodes: betCentr.append(Nodes[node]) attributes['NodeBetweennessCentrality'] = betCentr # PageRank pgRank = [] PRankH = snap.TIntFltH() snap.GetPageRank(UGraph, PRankH) for item in PRankH: pgRank.append(PRankH[item]) attributes['PageRank'] = pgRank # Hubs, Authorities score hubs = [] auth = [] NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(UGraph, NIdHubH, NIdAuthH) for item in NIdHubH: hubs.append(NIdHubH[item]) for item in NIdAuthH: auth.append(NIdAuthH[item]) attributes['HubsScore'] = hubs attributes['AuthoritiesScore'] = auth # Eigenvector Centrality eigenCentr = [] NIdEigenH = snap.TIntFltH() snap.GetEigenVectorCentr(UGraph, NIdEigenH) for item in NIdEigenH: eigenCentr.append(NIdEigenH[item]) attributes['EigenvectorCentrality'] = eigenCentr return attributes
print("Number of MxWcc Nodes:", MxWcc.GetNodes()) # IDs of the top 3 most central nodes in the network by PagePank scores PRankH = snap.TIntFlt64H() snap.GetPageRank(data, PRankH) PRankH.SortByDat(False) i = 0 itr = PRankH.BegI() print("The top 3 most central nodes in the network by PagePank scores:") while i < 3: print("Node:", itr.GetKey()) itr.Next() i += 1 print("") # IDs of the top 3 hubs and top 3 authorities in the network by HITS scores. NIdHubH = snap.TIntFlt64H() NIdAuthH = snap.TIntFlt64H() snap.GetHits(data, NIdHubH, NIdAuthH) NIdHubH.SortByDat(False) i = 0 itr = NIdHubH.BegI() print("The top 3 hubs in the network by HITS score:") while i < 3: print("Node:", itr.GetKey()) itr.Next() i += 1
print max_index[1] max = 0 for ele in PRankH: if PRankH[ele] > max and ele != max_index[0] and ele != max_index[1]: max = PRankH[ele] max_index[2] = ele print "Top 3 most central nodes by PageRank Score", max_index[2] # 4) g4 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1) hub = snap.TIntFltH() aut = snap.TIntFltH() snap.GetHits(g4, hub, aut) # Hubs max = 0 max_index_hub = [0, 0, 0] max_index_aut = [0, 0, 0] for ele in hub: if hub[ele] > max: max = hub[ele] max_index_hub[0] = ele max = 0 for ele in hub: if hub[ele] > max and ele != max_index_hub[0]: max = hub[ele]
snap.GetPageRank(G1, PRankH) count = 0 #sort the rank hash by data in descending order PRankH.SortByDat(False) iter = PRankH.BegI() print "3. The top 3 most central nodes in the network by PagePank scores" while (count < 3): print(iter.GetKey(), iter.GetDat()) iter = iter.Next() count += 1 #4 The top 3 hubs and top 3 authorities in the network by HITS scores NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G1, NIdHubH, NIdAuthH) print "4. The top 3 hubs and top 3 authorities in the network by HITS scores" count = 0 #sort in decending order NIdHubH.SortByDat(False) iter = NIdHubH.BegI() while (count < 3): print 'hub :', iter.GetKey(), iter.GetDat() iter = iter.Next() count += 1 count = 0 #sort in decending order NIdAuthH.SortByDat(False)
def compute_graph_statistics(graph_path, overwrite, compute_betweenness=False): graph_abs_path = os.path.abspath(graph_path) graph_name = os.path.basename(graph_abs_path).replace(".graph", "") fin = snap.TFIn(graph_abs_path) graph = snap.TNEANet.Load(fin) # rebuild the id => pkg dictionary id_pkg_dict = {} for node in graph.Nodes(): id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg") directory = os.path.dirname(os.path.abspath(graph_path)) json_path = os.path.join(directory, graph_name + "_statistics.json") if os.path.isfile(json_path): with open(json_path, "r") as f: statistics = json.load(f, object_pairs_hook=OrderedDict) else: statistics = OrderedDict() # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory os.chdir(directory) # general statistics output = os.path.join(directory, graph_name + "_main_statistics.txt") if not os.path.isfile(output) or overwrite: print("{0} Computing general statistics".format(datetime.datetime.now())) snap.PrintInfo(graph, "Play Store Graph -- main statistics", output, False) # info about the nodes with the max in degree if "max_in_degree" not in statistics or overwrite: print("{0} Computing max indegree".format(datetime.datetime.now())) max_in_deg_id = snap.GetMxInDegNId(graph) iterator = graph.GetNI(max_in_deg_id) max_in_deg = iterator.GetInDeg() max_in_deg_pkg = graph.GetStrAttrDatN(max_in_deg_id, "pkg") statistics["max_in_degree"] = max_in_deg statistics["max_in_degree_id"] = max_in_deg_id statistics["max_in_degree_pkg"] = max_in_deg_pkg # info about the nodes with the max out degree if "max_out_degree" not in statistics or overwrite: print("{0} Computing max outdegree".format(datetime.datetime.now())) max_out_deg_id = snap.GetMxOutDegNId(graph) iterator = graph.GetNI(max_out_deg_id) max_out_deg = iterator.GetOutDeg() max_out_deg_pkg = graph.GetStrAttrDatN(max_out_deg_id, "pkg") statistics["max_out_degree"] = max_out_deg statistics["max_out_degree_id"] = max_out_deg_id statistics["max_out_degree_pkg"] = max_out_deg_pkg # pagerank statistics output = graph_name + "_topNpagerank.eps" if not os.path.isfile(output) or "top_n_pagerank" not in statistics or overwrite: print("{0} Computing top 20 nodes with highest pagerank".format(datetime.datetime.now())) data_file = graph_name + "_pageranks" prank_hashtable = snap.TIntFltH() if not os.path.isfile(data_file) or overwrite: # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100 snap.GetPageRank(graph, prank_hashtable, 0.85) fout = snap.TFOut(data_file) prank_hashtable.Save(fout) else: fin = snap.TFIn(data_file) prank_hashtable.Load(fin) top_n = get_top_nodes_from_hashtable(prank_hashtable) top_n.sort(key=itemgetter(1)) if "top_n_pagerank" not in statistics or overwrite: top_n_labeled = [] for pair in top_n: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_pagerank"] = list(reversed(top_n_labeled)) if not os.path.isfile(output) or overwrite: # let's build a subgraph induced on the top 20 pagerank nodes subgraph = get_subgraph(graph, [x[0] for x in top_n]) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(prank_hashtable, [x[0] for x in top_n]) plot_subgraph_colored(subgraph, labels_dict, values, "PageRank", "Play Store Graph - top 20 PageRank nodes", output, "autumn_r") # betweeness statistics output = graph_name + "_topNbetweenness.eps" if compute_betweenness and (not os.path.isfile(output) or "betweenness" not in statistics or overwrite): print("{0} Computing top 20 nodes with highest betweenness".format(datetime.datetime.now())) data_file1 = graph_name + "_node_betweenness" data_file2 = graph_name + "_edge_betweenness" node_betwenness_hashtable = snap.TIntFltH() edge_betwenness_hashtable = snap.TIntPrFltH() if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite: snap.GetBetweennessCentr(graph, node_betwenness_hashtable, edge_betwenness_hashtable, 0.85, True) fout = snap.TFOut(data_file1) node_betwenness_hashtable.Save(fout) fout = snap.TFOut(data_file2) edge_betwenness_hashtable.Save(fout) else: fin = snap.TFIn(data_file1) node_betwenness_hashtable.Load(fin) fin = snap.TFIn(data_file2) edge_betwenness_hashtable.Load(fin) # unused, as now top_n = get_top_nodes_from_hashtable(node_betwenness_hashtable) top_n.sort(key=itemgetter(1)) if "top_n_betweenness" not in statistics or overwrite: top_n_labeled = [] for pair in top_n: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_betweenness"] = list(reversed(top_n_labeled)) if not os.path.isfile(output) or overwrite: # let's build a subgraph induced on the top 20 betweenness nodes subgraph = get_subgraph(graph, [x[0] for x in top_n]) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(node_betwenness_hashtable, [x[0] for x in top_n]) plot_subgraph_colored(subgraph, labels_dict, values, "Betweenness", "Play Store Graph - top 20 Betweenness nodes", output) # HITS statistics output_hub = graph_name + "_topNhitshubs.eps" output_auth = graph_name + "_topNhitsauth.eps" if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or "top_n_hits_hubs" not in statistics \ or "top_n_hits_authorities" not in statistics or overwrite: print("{0} Computing top 20 HITS hubs and auths".format(datetime.datetime.now())) data_file1 = graph_name + "_hits_hubs" data_file2 = graph_name + "_hits_auth" hubs_hashtable = snap.TIntFltH() auth_hashtable = snap.TIntFltH() if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite: # MaxIter = 20 snap.GetHits(graph, hubs_hashtable, auth_hashtable, 20) fout = snap.TFOut(data_file1) hubs_hashtable.Save(fout) fout = snap.TFOut(data_file2) auth_hashtable.Save(fout) else: fin = snap.TFIn(data_file1) hubs_hashtable.Load(fin) fin = snap.TFIn(data_file2) auth_hashtable.Load(fin) top_n_hubs = get_top_nodes_from_hashtable(hubs_hashtable) top_n_hubs.sort(key=itemgetter(1)) if "top_n_hits_hubs" not in statistics or overwrite: top_n_labeled = [] for pair in top_n_hubs: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_hits_hubs"] = list(reversed(top_n_labeled)) top_n_auth = get_top_nodes_from_hashtable(auth_hashtable) top_n_auth.sort(key=itemgetter(1)) if "top_n_hits_authorities" not in statistics or overwrite: top_n_labeled = [] for pair in top_n_auth: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_hits_authorities"] = list(reversed(top_n_labeled)) if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or overwrite: nodes_subset = set() for pair in top_n_hubs: nodes_subset.add(pair[0]) for pair in top_n_auth: nodes_subset.add(pair[0]) # let's build a subgraph induced on the top N HITS auths and hubs nodes subgraph = get_subgraph(graph, nodes_subset) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(hubs_hashtable, nodes_subset) values2 = snap_hashtable_to_dict(auth_hashtable, nodes_subset) plot_subgraph_colored(subgraph, labels_dict, values, "HITS - Hub Index", "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_hub, "bwr") plot_subgraph_colored(subgraph, labels_dict, values2, "HITS - Authority Index", "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_auth, "bwr_r") # indegree histogram output = graph_name + "_indegree" if not os.path.isfile("inDeg." + output + ".plt") or not os.path.isfile( "inDeg." + output + ".tab") or not os.path.isfile("inDeg." + output + ".png") or overwrite: print("{0} Computing indegree distribution".format(datetime.datetime.now())) snap.PlotInDegDistr(graph, output, "Play Store Graph - in-degree Distribution") # outdegree histogram output = graph_name + "_outdegree" if not os.path.isfile("outDeg." + output + ".plt") or not os.path.isfile( "outDeg." + output + ".tab") or not os.path.isfile( "outDeg." + output + ".png") or overwrite: print("{0} Computing outdegree distribution".format(datetime.datetime.now())) snap.PlotOutDegDistr(graph, output, "Play Store Graph - out-degree Distribution") # strongly connected components print output = graph_name + "_scc" if not os.path.isfile("scc." + output + ".plt") or not os.path.isfile( "scc." + output + ".tab") or not os.path.isfile("scc." + output + ".png") or overwrite: print("{0} Computing scc distribution".format(datetime.datetime.now())) snap.PlotSccDistr(graph, output, "Play Store Graph - strongly connected components distribution") # weakly connected components print output = graph_name + "_wcc" if not os.path.isfile("wcc." + output + ".plt") or not os.path.isfile( "wcc." + output + ".tab") or not os.path.isfile("wcc." + output + ".png") or overwrite: print("{0} Computing wcc distribution".format(datetime.datetime.now())) snap.PlotWccDistr(graph, output, "Play Store Graph - weakly connected components distribution") # clustering coefficient distribution output = graph_name + "_cf" if not os.path.isfile("ccf." + output + ".plt") or not os.path.isfile( "ccf." + output + ".tab") or not os.path.isfile("ccf." + output + ".png") or overwrite: print("{0} Computing cf distribution".format(datetime.datetime.now())) snap.PlotClustCf(graph, output, "Play Store Graph - clustering coefficient distribution") # shortest path distribution output = graph_name + "_hops" if not os.path.isfile("hop." + output + ".plt") or not os.path.isfile( "hop." + output + ".tab") or not os.path.isfile("hop." + output + ".png") or overwrite: print("{0} Computing shortest path distribution".format(datetime.datetime.now())) snap.PlotHops(graph, output, "Play Store Graph - Cumulative Shortest Paths (hops) distribution", True) # k-core edges distribution output = graph_name + "_kcore_edges" if not os.path.isfile("coreEdges." + output + ".plt") or not os.path.isfile( "coreEdges." + output + ".tab") or not os.path.isfile( "coreEdges." + output + ".png") or overwrite: print("{0} Computing k-core edges distribution".format(datetime.datetime.now())) snap.PlotKCoreEdges(graph, output, "Play Store Graph - K-Core edges distribution") # k-core nodes distribution output = graph_name + "_kcore_nodes" if not os.path.isfile("coreNodes." + output + ".plt") or not os.path.isfile( "coreNodes." + output + ".tab") or not os.path.isfile( "coreNodes." + output + ".png") or overwrite: print("{0} Computing k-core nodes distribution".format(datetime.datetime.now())) snap.PlotKCoreNodes(graph, output, "Play Store Graph - K-Core nodes distribution") with open(json_path, 'w') as outfile: json.dump(statistics, outfile, indent=2)