def solve_degree_based_questions(G, GName): #Number of nodes with degre seven CntV = snap.TIntPrV() snap.GetOutDegCnt(G, CntV) flag = 0 for p in CntV: if p.GetVal1() == 7: flag = p.GetVal2() break print "Number of nodes with degree=7 in %s: %d" % (GName[:-10], flag) #To find the number of nodes with maximum degree and thier IDs MaxDegree = CntV[len(CntV) - 1].GetVal1() Nodes_with_max_deg = [] for NI in G.Nodes(): if NI.GetOutDeg() == MaxDegree: Nodes_with_max_deg.append(str(NI.GetId())) string_of_nodes_with_max_deg = ",".join(Nodes_with_max_deg) print "Node id (s) with highest degree in {0}: {1}".format( GName[:-10], string_of_nodes_with_max_deg) #Plots the Degree Distribution filename = "outDeg." + GName[:-10] + ".png" snap.PlotOutDegDistr(G, GName[:-10], GName[:-10] + " - out-degree Distribution") print "Degree distribution of {0} is in: {1}".format(GName[:-10], filename)
def degree_distribution(self): snap.PlotOutDegDistr(self.graph, "Degree_Distribution", " Graph Degree Distribution") img = mpimg.imread("outDeg.Degree_Distribution.png") plt.figure() imgplot = plt.imshow(img) plt.show()
def partOneAndTwo(WikiG): # WikiG.Dump() print('1. Number of nodes: '+str(WikiG.GetNodes())) selfloop_cnt = 0 for node in WikiG.Nodes(): # print(node.GetId()) if WikiG.IsEdge(node.GetId(), node.GetId()): selfloop_cnt += 1 print('2. Self loop Node: {}'.format(selfloop_cnt)) cnt_dir = snap.CntUniqDirEdges(WikiG) print('3. The number of directed edges: {}'.format(cnt_dir)) cnt_undir = snap.CntUniqUndirEdges(WikiG) print("4. The number of unique undirected edges is %d" % cnt_undir) print("5. The number of reciprocated edges is %d" % (cnt_dir - cnt_undir)) cnt_in = snap.CntInDegNodes(WikiG, 0) print("6. The number of nodes of zero out-degree is %d" % cnt_in) cnt_out = snap.CntOutDegNodes(WikiG, 0) print("7. The number of nodes of zero in-degree is %d" % cnt_out) cnt_deg_above_10 = 0 cnt_deg_less_10 = 0 for node in WikiG.Nodes(): if node.GetOutDeg() > 10: cnt_deg_above_10 += 1 if node.GetInDeg() < 10: cnt_deg_less_10 += 1 print("8. The number of nodes with more than 10 outgoing edges is %d" % cnt_deg_above_10) print("9. The number of nodes with fewer than 10 incoming edges is %d" % cnt_deg_less_10) # Part 2 out_file_name = 'wiki' snap.PlotInDegDistr(WikiG, out_file_name, "Directed graph - in-degree Distribution") snap.PlotOutDegDistr(WikiG, out_file_name, "Directed graph - out-degree Distribution") InDegDistr = np.loadtxt("inDeg."+out_file_name+".tab") InDegDistr = InDegDistr[InDegDistr[:, 0] > 0] OutDegDistr = np.loadtxt("OutDeg."+out_file_name+".tab") # print(OutDegDistr.shape) OutDegDistr = OutDegDistr[OutDegDistr[:, 0] > 0] # print(OutDegDistr.shape) coff = np.polyfit(np.log10(OutDegDistr)[:, 0], np.log10(OutDegDistr)[:, 1], 1) print(coff) plt.figure() plt.subplot(211) plt.loglog(InDegDistr[:, 0], InDegDistr[:, 1]) plt.title('In deg Distr') plt.subplot(212) plt.loglog(OutDegDistr[:, 0], OutDegDistr[:, 1]) plt.loglog(OutDegDistr[:, 0], np.power(10, coff[1])*np.power(OutDegDistr[:, 0], coff[0])) plt.title('Out deg Distr & Last-Square Reg Line in log-log plot') plt.show()
def plot_degree_distribution(ei_graph, name): """Create a plot of degree distribution and saves image to file. :param name: used to create the output filename, `outDeg.name.plt`, and in the title of the plot. https://snap.stanford.edu/snappy/doc/reference/PlotOutDegDistr.html """ description = 'Degree Distribution for ' + name snap.PlotOutDegDistr(ei_graph.base(), name, description)
def plotOutDegDistr(graph): outdir = 'temp' os.chdir(outdir) fileName = 'out_deg_distr' snap.PlotOutDegDistr(graph, fileName, "Out Degree Distribution") base = 'outDeg.' + fileName out_fname = os.path.join(outdir, base + '.png') os.chdir('..') return os.path.abspath(out_fname)
def outDistPlot(graph): snap.PlotOutDegDistr(graph, "example", "Directed graph - out-degree Distribution") X, Y = [], [] ctr = 0 for line in open('/content/outDeg.example.tab', 'r'): if ctr > 3: values = [int(s) for s in line.split()] X.append(values[0]) Y.append(values[1]) else: ctr += 1 plt.plot(X, Y) plt.title("dist of outDeg of nodes") plt.show()
def degree_distribution_plot(G): """ Saves the degree distribution plot of the subgraph G The file is saved in the directory './plots/deg_dist_<subgraph_name>.png' """ snap.PlotOutDegDistr(G, sys.argv[-1], f"Degree Distribution in {sys.argv[-1]}") try: os.mkdir('./plots') except: pass os.rename(f'outDeg.{sys.argv[-1]}.png', f'./plots/deg_dist_{sys.argv[-1]}.png') os.remove(f'outDeg.{sys.argv[-1]}.plt') os.remove(f'outDeg.{sys.argv[-1]}.tab')
def genGraphInfo(self): graphName = self.graphName # get the number of nodes and edges in the graph print "Number of nodes in %s: %d" % (graphName, self.G.GetNodes()) print "Number of edges in %s: %d" % (graphName, self.G.GetEdges()) # get the node id(s) with highest degree nodeIdMaxDegree = snap.GetMxOutDegNId(self.G) maxDegree = -1 for node in self.G.Nodes(): if (node.GetId() == nodeIdMaxDegree): maxDegree = node.GetOutDeg() break nodeIdsMaxDegreeT = "" for node in self.G.Nodes(): if (maxDegree == node.GetOutDeg()): nodeIdsMaxDegreeT += str(node.GetId()) + "," print "Node id(s) with highest degree in %s: %s" % (graphName, nodeIdsMaxDegreeT) # plot degree distribution snap.PlotOutDegDistr(self.G, graphName, "Degree Distribution") degreeFileName = "outDeg." + graphName + ".png" print "Degree distribution of %s is in: %s" % (graphName, degreeFileName) # plot shortest path distribution snap.PlotShortPathDistr(self.G, graphName, "Shortest Path Distribution") shortestPathFileName = "diam." + graphName + ".png" print "Shortest path distribution of %s is in: %s" % ( graphName, shortestPathFileName) # get the fraction of nodes in largest cc print "Fraction of nodes in largest connected component in %s: %f" % ( graphName, snap.GetMxSccSz(self.G)) # plot the component size distribution snap.PlotSccDistr(self.G, graphName, "Component size distribution") sccFileName = "scc." + graphName + ".png" print "Component size distribution of %s is in: %s" % (graphName, sccFileName)
def print_statistics(self, outfile_name): print 'Writing to file:', outfile_name snap.PrintInfo(self.Graph, 'Python type TUNGraph', outfile_name, False) with open(outfile_name, 'a') as f: f.write('\n####More information') max_degree_node = snap.GetMxDegNId(self.Graph) for artist_id in self.ids: if self.ids[artist_id] == max_degree_node: print artist_id # These may throw gnuplot errors; if so, edit the generated .plt files to correct the errors and run # gnuplot from terminal. (May need to set terminal to svg instead of png depending on your gnuplot # installation.) snap.PlotOutDegDistr(self.Graph, 'out_degree_distr', 'Out-degree distribution') snap.PlotInDegDistr(self.Graph, 'in_degree_distr', 'In-degree distribution')
def wikiVotingNetwork(): Component = snap.TIntPrV() #Loding the graph Wiki = snap.LoadEdgeList(snap.PNGraph, "Wiki-Vote.txt", 0, 1) #Printing Number of Nodes in the Graph print "Number of Nodes: ", Wiki.GetNodes() #Printing Number of Edges in the Graph print "Number of Edges: ", Wiki.GetEdges() #Printing Number of Directed Edges in the Graph print "Number of Directed Edges: ", snap.CntUniqDirEdges(Wiki) #Printing Number of Un-Directed Edges in the Graph print "Number of Undirected Edges: ", snap.CntUniqUndirEdges(Wiki) #Printing Number of Directed Edges in the Graph print "Number of Self-Edges: ", snap.CntSelfEdges(Wiki) #Printing Number of Zero InDeg Nodes in the Graph print "Number of Zero InDeg Nodes: ", snap.CntInDegNodes(Wiki, 0) #Printing Number of Zero OutDeg Nodes in the Graph print "Number of Zero OutDeg Nodes: ", snap.CntOutDegNodes(Wiki, 0) #Printing Node ID with maximum degree in the Graph print "Node ID with maximum degree: ", snap.GetMxDegNId(Wiki) snap.GetSccSzCnt(Wiki, Component) for comp in Component: #printing number of strongly connected components with size print "Size: %d - Number of Strongly Connected Components: %d" % ( comp.GetVal1(), comp.GetVal2()) #printing size of largest connected components print "Size of largest connected component: ", snap.GetMxSccSz(Wiki) snap.GetWccSzCnt(Wiki, Component) for comp in Component: #printing number of weekly connected components with size print "Size: %d - Number of Weekly Connected Component Wikipedia: %d" % ( comp.GetVal1(), comp.GetVal2()) #printing size of weekly connected components print "Size of Weakly connected component: ", snap.GetMxWccSz(Wiki) #plotting out-degree distribution snap.PlotOutDegDistr(Wiki, "wiki-analysis", "Directed graph - Out-Degree Distribution")
## The Maximum Degree MxDeg = graph.GetNI(sn.GetMxDegNId(graph)).GetDeg() print("Node id(s) with highest degree: ", end="") flag = True for node in graph.Nodes(): if node.GetDeg() == MxDeg: if flag: print(node.GetId(), end="") flag = False else: print(", {}".format(node.GetId), end="") print() ## Plot of degrees sn.PlotOutDegDistr(graph, name, "Degree Distribution") plotRemove("outDeg", "deg_dist", name) # Question 3 numNodes = [10, 100, 1000] ## Full diameter fullDia = [sn.GetBfsFullDiam(graph, tNodes) for tNodes in numNodes] for i in range(3): print("Approximate full diameter by sampling {} nodes: {}".format( numNodes[i], fullDia[i])) print( "Approximate full diameter (mean and variance): {:.4f} {:.4f}".format( np.mean(fullDia), np.var(fullDia))) ## Effective Diameter
def main(): parentDir = os.getcwd() os.chdir(parentDir + "/subgraphs") sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1) subGraphName = sys.argv[1].split(".")[0] os.chdir(parentDir) #### 1 ######## node_count = 0 for node in sub_graph.Nodes(): node_count = node_count + 1 printWithOutNewLine("Number of nodes:", node_count) printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph)) #### 2 ######## printWithOutNewLine("Number of nodes with degree=7:", snap.CntDegNodes(sub_graph, 7)) rndMaxDegNId = snap.GetMxDegNId(sub_graph) nodeDegPairs = snap.TIntPrV() snap.GetNodeInDegV(sub_graph, nodeDegPairs) maxDegVal = 0 for pair in nodeDegPairs: if (pair.GetVal1() == rndMaxDegNId): maxDegVal = pair.GetVal2() break maxDegNodes = [] for pair in nodeDegPairs: if (pair.GetVal2() == maxDegVal): maxDegNodes.append(pair.GetVal1()) print("Node id(s) with highest degree:", end=" ") print(*maxDegNodes, sep=',') #### 3 ######## sampledFullDiam = [] sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False)) sampledFullDiamStats = [] sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4)) sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4)) printWithOutNewLine("Approximate full diameter by sampling 10 nodes:", sampledFullDiam[0]) printWithOutNewLine("Approximate full diameter by sampling 100 nodes:", sampledFullDiam[1]) printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:", sampledFullDiam[2]) print("Approximate full diameter (mean and variance):", end=" ") print(*sampledFullDiamStats, sep=',') sampledEffDiam = [] sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4)) sampledEffDiamStats = [] sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4)) sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4)) printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:", sampledEffDiam[0]) printWithOutNewLine( "Approximate effective diameter by sampling 100 nodes:", sampledEffDiam[1]) printWithOutNewLine( "Approximate effective diameter by sampling 1000 nodes:", sampledEffDiam[2]) print("Approximate effective diameter (mean and variance):", end=" ") print(*sampledEffDiamStats, sep=',') #### 4 ######## printWithOutNewLine("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(sub_graph), 4)) bridgeEdges = snap.TIntPrV() snap.GetEdgeBridges(sub_graph, bridgeEdges) printWithOutNewLine("Number of edge bridges:", len(bridgeEdges)) articulationPoints = snap.TIntV() snap.GetArtPoints(sub_graph, articulationPoints) printWithOutNewLine("Number of articulation points:", len(articulationPoints)) #### 5 ######## printWithOutNewLine("Average clustering coefficient:", round(snap.GetClustCf(sub_graph, -1), 4)) printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1)) randomNodeId = sub_graph.GetRndNId() nodeIdCcfMap = snap.TIntFltH() snap.GetNodeClustCf(sub_graph, nodeIdCcfMap) print("Clustering coefficient of random node", end=" ") print(randomNodeId, end=": ") print(round(nodeIdCcfMap[randomNodeId], 4)) print("Number of triads random node", end=" ") print(randomNodeId, end=" participates: ") print(snap.GetNodeTriads(sub_graph, randomNodeId)) printWithOutNewLine( "Number of edges that participate in at least one triad:", snap.GetTriadEdges(sub_graph, -1)) #### plots ######## if not os.path.isdir('plots'): os.makedirs('plots') os.chdir(parentDir + "/plots") plotsDir = os.getcwd() snap.PlotOutDegDistr(sub_graph, subGraphName, subGraphName + " Subgraph Degree Distribution") snap.PlotShortPathDistr( sub_graph, subGraphName, subGraphName + " Subgraph Shortest Path Lengths Distribution") snap.PlotSccDistr( sub_graph, subGraphName, subGraphName + " Subgraph Connected Components Size Distribution") snap.PlotClustCf( sub_graph, subGraphName, subGraphName + " Subgraph Clustering Coefficient Distribution") files = os.listdir(plotsDir) for file in files: if not file.endswith(".png"): os.remove(os.path.join(plotsDir, file)) plots = os.listdir(plotsDir) filePrefix = "filename" for file in plots: nameSplit = file.split(".") if (len(nameSplit) == 2): continue if (nameSplit[0] == "ccf"): filePrefix = "clustering_coeff_" elif (nameSplit[0] == "outDeg"): filePrefix = "deg_dist_" elif (nameSplit[0] == "diam"): filePrefix = "shortest_path_" elif (nameSplit[0] == "scc"): filePrefix = "connected_comp_" os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2]) os.chdir(parentDir)
G = snap.LoadEdgeList(snap.PNGraph, "Wiki-Vote.txt", 0, 1) snap.PrintInfo(G, "votes Stats", "votes-info.txt", False) # Node ID with maximum degree NId1 = snap.GetMxDegNId(G) print("Node ID with Maximum-Degree: %d" % NId1) # Number of Strongly connected components ComponentDist = snap.TIntPrV() snap.GetSccSzCnt(G, ComponentDist) for comp in ComponentDist: print("Size: %d - Number of Components: %d" % (comp.GetVal1(), comp.GetVal2())) # Size of largest strongly connected component print("Strongly Connected Component - Maximum size:", snap.GetMxSccSz(G)) # Number of Weakly Connected Components CompDist = snap.TIntPrV() snap.GetWccSzCnt(G, CompDist) for comp in CompDist: print("Size: %d - Number of Components: %d" % (comp.GetVal1(), comp.GetVal2())) # Size of largest weakly connected component print("Weakly Connected Component - Maximum size:", snap.GetMxWccSz(G)) # Plot of Outdegree Distribution snap.PlotOutDegDistr(G, "Wiki Votes", "Wiki-Votes Out Degree")
Mx_degree_id = [] result_degree = snap.TIntV() snap.GetDegSeqV(p2p_gnutella04_subgraph, result_degree) for i in range(0, result_degree.Len()): if (result_degree[i] == CntV4[CntV4.Len() - 1].GetVal1()): Mx_degree_id.append(i) print "Node id(s) with highest degree in email-Enron-subgraph: " + str( Mx_degree_id) # Task 1.2.2.3 if (sub_graph_name == "soc-Epinions1-subgraph"): # Plotting the degree distribution snap.PlotOutDegDistr(soc_epinions1_subgraph, "soc-Epinions1-subgraph", "Undirected graph degree Distribution") print "Degree distribution of soc-Epinions1-subgraph: " + "outDeg.soc-Epinions1-subgraph.png" if (sub_graph_name == "cit-HepPh-subgraph"): # Plotting the degree distribution snap.PlotOutDegDistr(cit_heph_subgraph, "cit-HepPh-subgraph", "Undirected graph degree Distribution") print "Degree distribution of cit-HepPh-subgraph: " + "outDeg.cit-HepPh-subgraph.png" if (sub_graph_name == "email-Enron-subgraph"): # Plotting the degree distribution snap.PlotOutDegDistr(email_enron_subgraph, "email-Enron-subgraph", "Undirected graph degree Distribution") print "Degree distribution of email-Enron-subgraph: " + "outDeg.email-Enron-subgraph.png"
import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.linear_model import LinearRegression DATA_PATH = './Wiki-Vote.txt' if __name__ == '__main__': # Build Wiki Graph G1 = snap.LoadEdgeList(snap.PNGraph, DATA_PATH, 0, 1) # use Snap.py own plot tools, but not shown. snap.PlotOutDegDistr(G1, 'Wiki', 'Wiki') # So I draw everything by my own. DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(G1, DegToCntV) out_deg = [] deg_cnt = [] for item in DegToCntV: deg_cnt.append(item.GetVal2()) out_deg.append(item.GetVal1()) out_deg_dis = pd.DataFrame({'Out_Degree_Value': out_deg, "Out_Degree_Cnt": deg_cnt}) out_deg_dis.drop(index=0, inplace=True)
def out_deg_distribution(graph, fig_name): """Plot the out-degree distribution of nodes in graph""" snap.PlotOutDegDistr(graph, fig_name, "Distribution of out-degrees of nodes")
S = snap.TIntStrH() GI = snap.TNGraph.New() for i in range(len(ListOfUsers[0])): GI.AddNode(ListOfUsers[0][i]) print "interests= ", len(Interests[0]) for i in range(len(Interests[0])): S.AddDat((10000 + i), Interests[0][i]) GI.AddNode(10000 + i) for i in range(len(followerFile)): GI.AddEdge(interestFile.iloc[i, 0], getval(S, interestFile.iloc[i, 1])) snap.DrawGViz(G1, snap.gvlDot, "reco.png", "Network Diagram", True, snap.TIntStrH()) snap.PlotInDegDistr(G1, "Indeg", "Directed graph - in-degree") snap.PlotOutDegDistr(G1, "Outdeg", "Directed graph - out-degree") # vector of pairs of integers (size, count) ComponentDist = snap.TIntPrV() # get distribution of connected components (component size, count) snap.GetWccSzCnt(G1, ComponentDist) for comp in ComponentDist: print "Size: %d - Number of Components: %d" % (comp.GetVal1(), comp.GetVal2()) Count = snap.CntUniqDirEdges(G1) print "Directed Graph: Count of unique directed edges is %d" % Count # get degree distribution pairs (degree, count) snap.GetOutDegCnt(G1, ComponentDist) print "Degree Distribution Pairs-" xval = []
# Plot. fig = plt.plot(data['degree'], data['count'], 'bo--', markersize=2)[0] fig.axes.set_xscale('log') fig.axes.set_yscale('log') fig.axes.set_xlim(data['degree'].min(), data['degree'].max()) fig.axes.set_ylim(data['count'].min(), data['count'].max()) fig.axes.set_title("Log-Log Degree Distribution Plot for WikiGraph") fig.axes.set_xlabel("Node Degree") fig.axes.set_ylabel("Node Count") # Save image. plt.savefig("WikiGraphOutDegreeDistribution", format='svg', dpi=600) plt.savefig("WikiGraphOutDegreeDistribution", dpi=600) # Alternative 2.1. snap.PlotOutDegDistr(wikiGraph, "WikiGraph", "WikiGraph - Out Degree Distribution") # 2.2: Compute and plot the least-square regression line. # Calculate the best fit line on the log data. slope, intercept = np.polyfit(np.log10(data['degree']), np.log10(data['count']), FIT_DEGREE) predict = lambda x: 10**(intercept) * x**slope # Plot. fig = plt.plot(data['degree'], data['count'], 'bo--', data['degree'], predict(data['degree']), 'g', markersize=2)[0]
# Graph Testing # ---------------------------------- import snap as s import random as rand # Generate undirected Erdos Reyni random graph # set up vertices and edges vertices = 20 edges = 15 u_rndm_graph = snap.GenRndGnm(snap.PUNGraph, vertices, edges) # Draw the graph to a plot, counting vertices snap.DrawGViz(u_rndm_graph, snap.gvlNeato, "graph_rdm_undirected.png", "Undirected Random Graph", True) # Plot the out degree distrib snap.PlotOutDegDistr(u_rndm_graph, "graph_rdm_undirected", "Undirected graph - out-degree Distribution") # Compute and print the list of all edges for vertex_in in u_rndm_graph.Nodes(): for vertex_out_id in vertex_in.GetOutEdges(): print "edge (%d %d)" % (vertex_in.GetId(), vertex_out_id) # Save it to an external file snap.SaveEdgeList(u_rndm_graph, "Rndm_graph.txt", "Save as tab-separated list of edges") # Compute degree distribution and save it to an external textfile degree_vertex_count = snap.TIntPrV() s.GetOutDegCnt(u_rndm_graph, degree_vertex_count) file = open("graph_rdm_undirected_degree_distrib.txt", "w") file.write("#----------------------------------\n") file.write("# Degree Distribution \n")
def compute_graph_statistics(graph_path, overwrite, compute_betweenness=False): graph_abs_path = os.path.abspath(graph_path) graph_name = os.path.basename(graph_abs_path).replace(".graph", "") fin = snap.TFIn(graph_abs_path) graph = snap.TNEANet.Load(fin) # rebuild the id => pkg dictionary id_pkg_dict = {} for node in graph.Nodes(): id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg") directory = os.path.dirname(os.path.abspath(graph_path)) json_path = os.path.join(directory, graph_name + "_statistics.json") if os.path.isfile(json_path): with open(json_path, "r") as f: statistics = json.load(f, object_pairs_hook=OrderedDict) else: statistics = OrderedDict() # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory os.chdir(directory) # general statistics output = os.path.join(directory, graph_name + "_main_statistics.txt") if not os.path.isfile(output) or overwrite: print("{0} Computing general statistics".format(datetime.datetime.now())) snap.PrintInfo(graph, "Play Store Graph -- main statistics", output, False) # info about the nodes with the max in degree if "max_in_degree" not in statistics or overwrite: print("{0} Computing max indegree".format(datetime.datetime.now())) max_in_deg_id = snap.GetMxInDegNId(graph) iterator = graph.GetNI(max_in_deg_id) max_in_deg = iterator.GetInDeg() max_in_deg_pkg = graph.GetStrAttrDatN(max_in_deg_id, "pkg") statistics["max_in_degree"] = max_in_deg statistics["max_in_degree_id"] = max_in_deg_id statistics["max_in_degree_pkg"] = max_in_deg_pkg # info about the nodes with the max out degree if "max_out_degree" not in statistics or overwrite: print("{0} Computing max outdegree".format(datetime.datetime.now())) max_out_deg_id = snap.GetMxOutDegNId(graph) iterator = graph.GetNI(max_out_deg_id) max_out_deg = iterator.GetOutDeg() max_out_deg_pkg = graph.GetStrAttrDatN(max_out_deg_id, "pkg") statistics["max_out_degree"] = max_out_deg statistics["max_out_degree_id"] = max_out_deg_id statistics["max_out_degree_pkg"] = max_out_deg_pkg # pagerank statistics output = graph_name + "_topNpagerank.eps" if not os.path.isfile(output) or "top_n_pagerank" not in statistics or overwrite: print("{0} Computing top 20 nodes with highest pagerank".format(datetime.datetime.now())) data_file = graph_name + "_pageranks" prank_hashtable = snap.TIntFltH() if not os.path.isfile(data_file) or overwrite: # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100 snap.GetPageRank(graph, prank_hashtable, 0.85) fout = snap.TFOut(data_file) prank_hashtable.Save(fout) else: fin = snap.TFIn(data_file) prank_hashtable.Load(fin) top_n = get_top_nodes_from_hashtable(prank_hashtable) top_n.sort(key=itemgetter(1)) if "top_n_pagerank" not in statistics or overwrite: top_n_labeled = [] for pair in top_n: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_pagerank"] = list(reversed(top_n_labeled)) if not os.path.isfile(output) or overwrite: # let's build a subgraph induced on the top 20 pagerank nodes subgraph = get_subgraph(graph, [x[0] for x in top_n]) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(prank_hashtable, [x[0] for x in top_n]) plot_subgraph_colored(subgraph, labels_dict, values, "PageRank", "Play Store Graph - top 20 PageRank nodes", output, "autumn_r") # betweeness statistics output = graph_name + "_topNbetweenness.eps" if compute_betweenness and (not os.path.isfile(output) or "betweenness" not in statistics or overwrite): print("{0} Computing top 20 nodes with highest betweenness".format(datetime.datetime.now())) data_file1 = graph_name + "_node_betweenness" data_file2 = graph_name + "_edge_betweenness" node_betwenness_hashtable = snap.TIntFltH() edge_betwenness_hashtable = snap.TIntPrFltH() if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite: snap.GetBetweennessCentr(graph, node_betwenness_hashtable, edge_betwenness_hashtable, 0.85, True) fout = snap.TFOut(data_file1) node_betwenness_hashtable.Save(fout) fout = snap.TFOut(data_file2) edge_betwenness_hashtable.Save(fout) else: fin = snap.TFIn(data_file1) node_betwenness_hashtable.Load(fin) fin = snap.TFIn(data_file2) edge_betwenness_hashtable.Load(fin) # unused, as now top_n = get_top_nodes_from_hashtable(node_betwenness_hashtable) top_n.sort(key=itemgetter(1)) if "top_n_betweenness" not in statistics or overwrite: top_n_labeled = [] for pair in top_n: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_betweenness"] = list(reversed(top_n_labeled)) if not os.path.isfile(output) or overwrite: # let's build a subgraph induced on the top 20 betweenness nodes subgraph = get_subgraph(graph, [x[0] for x in top_n]) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(node_betwenness_hashtable, [x[0] for x in top_n]) plot_subgraph_colored(subgraph, labels_dict, values, "Betweenness", "Play Store Graph - top 20 Betweenness nodes", output) # HITS statistics output_hub = graph_name + "_topNhitshubs.eps" output_auth = graph_name + "_topNhitsauth.eps" if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or "top_n_hits_hubs" not in statistics \ or "top_n_hits_authorities" not in statistics or overwrite: print("{0} Computing top 20 HITS hubs and auths".format(datetime.datetime.now())) data_file1 = graph_name + "_hits_hubs" data_file2 = graph_name + "_hits_auth" hubs_hashtable = snap.TIntFltH() auth_hashtable = snap.TIntFltH() if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite: # MaxIter = 20 snap.GetHits(graph, hubs_hashtable, auth_hashtable, 20) fout = snap.TFOut(data_file1) hubs_hashtable.Save(fout) fout = snap.TFOut(data_file2) auth_hashtable.Save(fout) else: fin = snap.TFIn(data_file1) hubs_hashtable.Load(fin) fin = snap.TFIn(data_file2) auth_hashtable.Load(fin) top_n_hubs = get_top_nodes_from_hashtable(hubs_hashtable) top_n_hubs.sort(key=itemgetter(1)) if "top_n_hits_hubs" not in statistics or overwrite: top_n_labeled = [] for pair in top_n_hubs: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_hits_hubs"] = list(reversed(top_n_labeled)) top_n_auth = get_top_nodes_from_hashtable(auth_hashtable) top_n_auth.sort(key=itemgetter(1)) if "top_n_hits_authorities" not in statistics or overwrite: top_n_labeled = [] for pair in top_n_auth: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_hits_authorities"] = list(reversed(top_n_labeled)) if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or overwrite: nodes_subset = set() for pair in top_n_hubs: nodes_subset.add(pair[0]) for pair in top_n_auth: nodes_subset.add(pair[0]) # let's build a subgraph induced on the top N HITS auths and hubs nodes subgraph = get_subgraph(graph, nodes_subset) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(hubs_hashtable, nodes_subset) values2 = snap_hashtable_to_dict(auth_hashtable, nodes_subset) plot_subgraph_colored(subgraph, labels_dict, values, "HITS - Hub Index", "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_hub, "bwr") plot_subgraph_colored(subgraph, labels_dict, values2, "HITS - Authority Index", "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_auth, "bwr_r") # indegree histogram output = graph_name + "_indegree" if not os.path.isfile("inDeg." + output + ".plt") or not os.path.isfile( "inDeg." + output + ".tab") or not os.path.isfile("inDeg." + output + ".png") or overwrite: print("{0} Computing indegree distribution".format(datetime.datetime.now())) snap.PlotInDegDistr(graph, output, "Play Store Graph - in-degree Distribution") # outdegree histogram output = graph_name + "_outdegree" if not os.path.isfile("outDeg." + output + ".plt") or not os.path.isfile( "outDeg." + output + ".tab") or not os.path.isfile( "outDeg." + output + ".png") or overwrite: print("{0} Computing outdegree distribution".format(datetime.datetime.now())) snap.PlotOutDegDistr(graph, output, "Play Store Graph - out-degree Distribution") # strongly connected components print output = graph_name + "_scc" if not os.path.isfile("scc." + output + ".plt") or not os.path.isfile( "scc." + output + ".tab") or not os.path.isfile("scc." + output + ".png") or overwrite: print("{0} Computing scc distribution".format(datetime.datetime.now())) snap.PlotSccDistr(graph, output, "Play Store Graph - strongly connected components distribution") # weakly connected components print output = graph_name + "_wcc" if not os.path.isfile("wcc." + output + ".plt") or not os.path.isfile( "wcc." + output + ".tab") or not os.path.isfile("wcc." + output + ".png") or overwrite: print("{0} Computing wcc distribution".format(datetime.datetime.now())) snap.PlotWccDistr(graph, output, "Play Store Graph - weakly connected components distribution") # clustering coefficient distribution output = graph_name + "_cf" if not os.path.isfile("ccf." + output + ".plt") or not os.path.isfile( "ccf." + output + ".tab") or not os.path.isfile("ccf." + output + ".png") or overwrite: print("{0} Computing cf distribution".format(datetime.datetime.now())) snap.PlotClustCf(graph, output, "Play Store Graph - clustering coefficient distribution") # shortest path distribution output = graph_name + "_hops" if not os.path.isfile("hop." + output + ".plt") or not os.path.isfile( "hop." + output + ".tab") or not os.path.isfile("hop." + output + ".png") or overwrite: print("{0} Computing shortest path distribution".format(datetime.datetime.now())) snap.PlotHops(graph, output, "Play Store Graph - Cumulative Shortest Paths (hops) distribution", True) # k-core edges distribution output = graph_name + "_kcore_edges" if not os.path.isfile("coreEdges." + output + ".plt") or not os.path.isfile( "coreEdges." + output + ".tab") or not os.path.isfile( "coreEdges." + output + ".png") or overwrite: print("{0} Computing k-core edges distribution".format(datetime.datetime.now())) snap.PlotKCoreEdges(graph, output, "Play Store Graph - K-Core edges distribution") # k-core nodes distribution output = graph_name + "_kcore_nodes" if not os.path.isfile("coreNodes." + output + ".plt") or not os.path.isfile( "coreNodes." + output + ".tab") or not os.path.isfile( "coreNodes." + output + ".png") or overwrite: print("{0} Computing k-core nodes distribution".format(datetime.datetime.now())) snap.PlotKCoreNodes(graph, output, "Play Store Graph - K-Core nodes distribution") with open(json_path, 'w') as outfile: json.dump(statistics, outfile, indent=2)
maxNodes = [] maxDegree = 0 for node in UGraph.Nodes(): if (node.GetOutDeg()) > maxDegree: maxDegree = (node.GetOutDeg()) for node in UGraph.Nodes(): if (node.GetOutDeg()) == maxDegree: maxNodes.append(node.GetId()) print "Node id(s) with the highest degree in %s: %s\n" % (file, maxNodes) # c) creates a plot of the out-degree distribution plotFN = file + ".outDeg.Distribution-plot.png" snap.PlotOutDegDistr(UGraph, plotFN, "Undirected graph degree distribution for file " + file) print "\nDegree distribution of %s is in: %s\n" % (file, plotFN) # 3) Paths in the network: print "Paths in the network:\n" # a) approximate full diameter of the graph # function to calculate the mean def mean(data): return sum(data) / len(data) # function to calculate the variance def variance(data): n = len(data)
return G Gtrain=generateGraph(train) Gval=generateGraph(trueValidation) Gtest=generateGraph(trueTest) snap.PlotSccDistr(Gtest, "destribution_gtest", "G_{test}") snap.PlotSccDistr(Gtest, "destribution_gtrain", "G_{train}") snap.PlotSccDistr(Gtest, "destribution_gval", "G_{val}") snap.PlotOutDegDistr(Gtest, "degree_gtest", "G_{test}", False, True) snap.PlotOutDegDistr(Gtrain, "degree_gtrain", "G_{train}", False, True) snap.PlotOutDegDistr(Gval, "degree_gval", "G_{val}", False, True) import sys def extractFeatures(G, data, N): ''' Returns dictionary of features for (u,v)''' results={} user_to_rating={u_to_id[row.user_id]: row.stars for _, row in data.iterrows()} print "Finished star for user" sys.stdout.flush()
def main(): Component = snap.TIntPrV() #loading the real world graph realWorld = snap.LoadEdgeList(snap.PUNGraph, "CA-HepTh.txt", 0, 1) #deleting the self-edges from the graph snap.DelSelfEdges(realWorld) #calling the function wikiVotingNetwork() #Taking number of nodes in a graph from real world network n = realWorld.GetNodes() #Generating an Undirected Graph G = snap.TUNGraph.New() #Taking number of edges in a graph from user e = int(raw_input('Enter the number of Random Edges : ')) p = float( raw_input('Enter the Probability of Edges between Nodes from 0-1 : ')) #Generating Number of Nodes for i in range(n): #Adding Nodes into the graph G.AddNode(i) #calling the function erdosRenyi(G, p) #Printing the Clustering print 'Erdos Renyi Clustering Co-efficient: ', clustCoefficient(G) diam = snap.GetBfsFullDiam(G, 9877, False) #printing the diameter print 'Erdos Renyi Diameter: ', diam #plotting the graph snap.PlotOutDegDistr(G, "Erdos-Renyi", "Un-Directed graph - Out-Degree Distribution") snap.GetSccSzCnt(G, Component) for comp in Component: #printing number of strongly connected components with size print "Size: %d - Number of Connected Component in Erdos-Renyi: %d" % ( comp.GetVal1(), comp.GetVal2()) #printing fraction of nodes and edges print "Fraction of Nodes and Edges in Erdos Renyi: ", snap.GetMxSccSz(G) #Drawing a Erdos Renyi Graph snap.DrawGViz(G, snap.gvlDot, "erdosRenyi1.png", "Erdos Renyi") #calling the function smallWorldRandomNetwork(G, e) #printing the clustering coefficient print 'Small World Random Network Clustering Co-efficient: ', clustCoefficient( G) diam = snap.GetBfsFullDiam(G, 9877, False) #printing the diameter print 'Small World Random Network Diameter: ', diam snap.GetSccSzCnt(G, Component) for comp in Component: #printing number of strongly connected components with size print "Size: %d - Number of Connected Component in Small World: %d" % ( comp.GetVal1(), comp.GetVal2()) #fraction of nodes and edges in small world print "Fraction of Nodes and Edges in Small World: ", snap.GetMxSccSz(G) #plotting the graph snap.PlotOutDegDistr(G, "Small-World", "Un-Directed graph - Out-Degree Distribution") #drawinf the graph snap.DrawGViz(G, snap.gvlDot, "smallWorld1.png", "Small World Random Network") #calculating the clustering co-efficient print 'Real World Random Network Clustering Co-efficient: ', clustCoefficient( realWorld) diam = snap.GetBfsFullDiam(G, 9877, False) print 'Real World Random Network Diameter: ', diam snap.GetSccSzCnt(realWorld, Component) for comp in Component: #printing number of strongly connected components with size print "Size: %d - Number of Weekly Connected Component in Real World: %d" % ( comp.GetVal1(), comp.GetVal2()) #printing fraction of nodes and edges print "Fraction of Nodes and Edges in Small World: ", snap.GetMxSccSz( realWorld) #plotting the real world network graph snap.PlotOutDegDistr(realWorld, "real-World", "Un-Directed graph - Out-Degree Distribution") #Drawing Real WOrld Graph snap.DrawGViz(realWorld, snap.gvlDot, "realWorld.png", "Real World Random Network")
from graph import * import snap # plot deg dist snap.PlotInDegDistr(graph, "InDegDist", flnme+" in-degree distribution") snap.PlotOutDegDistr(graph, "OutDegDist", flnme+" out-degree distribution") # plot connected components dist snap.PlotSccDistr(graph, "SccDist", flnme+" strongly connected components distribution") snap.PlotWccDistr(graph, "WccDist", flnme+" weakly connected components distribution") # plot cluster coefficient snap.PlotClustCf(graph, "ClustCoef", flnme+" clustering coefficient")
import snap import sys # Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000) G5 = snap.LoadEdgeList(snap.PNGraph, sys.argv[1], 0, 1) snap.PlotOutDegDistr(G5, sys.argv[2], "Directed graph - out-degree Distribution")
def graphStructure(elistName, elistPath): """ Calculate properties of the graph as given in the assignment Args: elistName (str) -> Input elist name elistPath (pathlib.Path) -> Input elist using which graph needs to be built Return: RESULTS (dict) -> Dictionary containing results for different subparts of the assignment """ RESULTS = {} subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1) # Part 1 (Size of the network) RESULTS['nodeCount'] = subGraph.GetNodes() RESULTS['edgeCount'] = subGraph.GetEdges() # Part 2 (Degree of nodes in the network) maxDegree = 0 maxDegreeNodes = [] degree7Count = 0 for node in subGraph.Nodes(): if node.GetDeg() == 7: degree7Count += 1 maxDegree = max(maxDegree, node.GetDeg()) for node in subGraph.Nodes(): if node.GetDeg() == maxDegree: maxDegreeNodes.append(node.GetId()) plotFilename = f"deg_dist_{elistName}" # Since it is an undirected graph, in/out degree is unimportant snap.PlotOutDegDistr(subGraph, plotFilename) RESULTS['maxDegree'] = maxDegree RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes)) RESULTS['degree7Count'] = degree7Count # Part 3 (Paths in the network) # Full Diameter Calculation fullDiameters = { 10: snap.GetBfsFullDiam(subGraph, 10, False), 100: snap.GetBfsFullDiam(subGraph, 100, False), 1000: snap.GetBfsFullDiam(subGraph, 1000, False) } fullMean, fullVariance = meanVariance(fullDiameters.values()) fullDiameters['mean'] = fullMean fullDiameters['variance'] = fullVariance RESULTS['fullDiameters'] = fullDiameters # Effective Diameter Calculation effDiameters = { 10: snap.GetBfsEffDiam(subGraph, 10, False), 100: snap.GetBfsEffDiam(subGraph, 100, False), 1000: snap.GetBfsEffDiam(subGraph, 1000, False), } effMean, effVariance = meanVariance(effDiameters.values()) effDiameters['mean'] = effMean effDiameters['variance'] = effVariance RESULTS['effDiameters'] = effDiameters plotFilename = f"shortest_path_{elistName}" snap.PlotShortPathDistr(subGraph, plotFilename) # Part 4 (Components of the network) edgeBridges = snap.TIntPrV() articulationPoints = snap.TIntV() RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph) snap.GetEdgeBridges(subGraph, edgeBridges) snap.GetArtPoints(subGraph, articulationPoints) RESULTS['edgeBridges'] = len(edgeBridges) RESULTS['articulationPoints'] = len(articulationPoints) plotFilename = f"connected_comp_{elistName}" snap.PlotSccDistr(subGraph, plotFilename) # Part 5 (Connectivity and clustering in the network) RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1) RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0] nodeX = subGraph.GetRndNId(Rnd) nodeY = subGraph.GetRndNId(Rnd) RESULTS['randomClusterCoefficient'] = (nodeX, snap.GetNodeClustCf( subGraph, nodeX)) RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY)) RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph) plotFilename = f"clustering_coeff_{elistName}" snap.PlotClustCf(subGraph, plotFilename) return RESULTS
def main(): # Number of nodes n = int(raw_input("Please enter the number of nodes")) # Probability of an edge between nodes p = float( raw_input( "Please enter the value of probability of an edge between nodes")) # Random Input of x pairs of nodes x = int(raw_input("Please enter the number of random, x pairs of nodes:")) # Empty graph and add nodes ERM = Empty_graph(n) # Add edges to the graph using personal Erdos Renyi Model Erdos_Renyi(ERM, p) # Erdos Renyi Clustering Coeffecient print("Clustering Coeffecient: ", clustering_coffecient(ERM)) # Diameter diameter_ERM = snap.GetBfsEffDiamAll(ERM, 10, False) print(diameter_ERM[2]) # Largest Strongly Connected Component print("Largest Strongly Connected Component - Maximum size:", snap.GetMxSccSz(Small_world)) # Largest Size of Graph ERM_size = snap.GetMxScc(ERM).GetEdges() print(ERM_size) # Plot of Degree Distribution snap.PlotOutDegDistr(ERM, "ERMGraph", "ERM Degree Distribution") # Add Small World Network Small_world = Empty_graph(n) first_edges(Small_world) second_edges(Small_world) random_edges(Small_world, x) # Small World Clustering Coeffecient print("Clustering Coeffecient: ", clustering_coffecient(Small_world)) # Diameter diameter_Small_world = snap.GetBfsEffDiamAll(Small_world, 10, False) print(diameter_Small_world[2]) # Largest Strongly Connected Component print("Largest Strongly Connected Component - Maximum size:", snap.GetMxSccSz(Small_world)) # Largest Size of Graph Small_world_size = snap.GetMxScc(Small_world).GetEdges() print(Small_world_size) # Plot of Degree Distribution snap.PlotOutDegDistr(Small_world, "SmallWorldGraph", "Small World Degree Distribution") # Add Collaboration Network Collaboration_Network = snap.LoadEdgeList(snap.PUNGraph, "CA-HepTh.txt", 0, 1) snap.DelSelfEdges(Collaboration_Network) snap.PrintInfo(Collaboration_Network, "Graph Statistics", "info.txt", False) # Collaboration Network Clustering Coeffecient print("Clustering Coeffecient: ", clustering_coffecient(Collaboration_Network)) # Diameter diameter_Collaboration_Network = snap.GetBfsEffDiamAll( Collaboration_Network, 10, False) print(diameter_Collaboration_Network[2]) # Largest Strongly Connected Component print("Largest Strongly Connected Component - Maximum size:", snap.GetMxSccSz(Collaboration_Network)) # Largest Size of Graph Collaboration_Network_size = snap.GetMxScc( Collaboration_Network).GetEdges() print(Collaboration_Network_size) # Plot of Degree Distribution snap.PlotOutDegDistr(Collaboration_Network, "CollaborationNetworkGraph", "Collaboration Network Degree Distribution")
fben = fbsgel.GetEdges() print("Number of edges:", fben) #Q2 #a print("Number of nodes with degree=7:", snap.CntDegNodes(fbsgel, 7)) #b max_deg_fb_id = snap.GetMxDegNId(fbsgel) NI = fbsgel.GetNI(max_deg_fb_id) max_deg_fb = NI.GetDeg() for NI in fbsgel.Nodes(): if (NI.GetDeg() == max_deg_fb): MaxDegVfb.append(NI.GetId()) MaxDegNodeString = ','.join(map(str, MaxDegVfb)) print("Node id(s) with highest degree:", MaxDegNodeString) #c snap.PlotOutDegDistr(fbsgel, "deg_dist_" + str(subgraph_name), "deg_dist_" + str(subgraph_name)) #Q3 #a i = 10 average = 0.0 variance = 0.0 while (i <= 1000): diam = snap.GetBfsFullDiam(fbsgel, i, False) print("Approximate full diameter by sampling", i, "nodes:", round(diam, 4)) i *= 10 average += diam variance += (diam * diam) average /= 3 variance = (variance / 3) - average * average print("Approximate full diameter(mean and variance): %0.4f,%0.4f" % (average, variance))
import snap Graph = snap.LoadEdgeList(snap.PUNGraph, 'G1.edgelist', 0, 1) ERGraph = snap.LoadEdgeList(snap.PUNGraph, 'G2.edgelist', 0, 1) WSGraph = snap.LoadEdgeList(snap.PUNGraph, 'G3.edgelist', 0, 1) BAGraph = snap.LoadEdgeList(snap.PUNGraph, 'G4.edgelist', 0, 1) snap.PlotOutDegDistr(Graph, "G1", "degree Distribution") snap.PlotOutDegDistr(ERGraph, "G2", "degree Distribution") snap.PlotOutDegDistr(WSGraph, "G3", "degree Distribution") snap.PlotOutDegDistr(BAGraph, "G4", "degree Distribution")