#4.Components of the network fraction = snap.GetMxSccSz(Graph1) print("Fraction of nodes in largest connected component: %0.4f" % fraction) V_edges = snap.TIntPrV() snap.GetEdgeBridges(Graph1, V_edges) edge_bridges = V_edges.Len() print("Number of edge bridges: ", edge_bridges) Art_points = snap.TIntV() snap.GetArtPoints(Graph1, Art_points) art = Art_points.Len() print("Number of articulation points: ", art) str2 = "connected_comp_" + file_name snap.PlotSccDistr(Graph1, str2, "Distribution of sizes of connected components") #5.Connectivity and clustering in the network avg_cc = snap.GetClustCf(Graph1, -1) print("Average clustering coefficient: %0.4f" % avg_cc) triads = snap.GetTriads(Graph1, -1) print("Number of triads: ", triads) random1 = Graph1.GetRndNId(Rnd) node_cc = snap.GetNodeClustCf(Graph1, random1) print("Clustering coefficient of random node %d: %0.4f" % (random1, node_cc)) random2 = Graph1.GetRndNId(Rnd) node_triads = snap.GetNodeTriads(Graph1, random2) print("Number of triads random node %d participates: %d" % (random2, node_triads))
G=snap.TUNGraph.New(N, M) for i in xrange(N): G.AddNode(i) for u, b in zip(data.user_id, data.business_id): G.AddEdge(u_to_id[u], b_to_id[b]) assert G.GetNodes() == N assert G.GetEdges() == M return G Gtrain=generateGraph(train) Gval=generateGraph(trueValidation) Gtest=generateGraph(trueTest) snap.PlotSccDistr(Gtest, "destribution_gtest", "G_{test}") snap.PlotSccDistr(Gtest, "destribution_gtrain", "G_{train}") snap.PlotSccDistr(Gtest, "destribution_gval", "G_{val}") snap.PlotOutDegDistr(Gtest, "degree_gtest", "G_{test}", False, True) snap.PlotOutDegDistr(Gtrain, "degree_gtrain", "G_{train}", False, True) snap.PlotOutDegDistr(Gval, "degree_gval", "G_{val}", False, True)
def graphStructure(elistName, elistPath): """ Calculate properties of the graph as given in the assignment Args: elistName (str) -> Input elist name elistPath (pathlib.Path) -> Input elist using which graph needs to be built Return: RESULTS (dict) -> Dictionary containing results for different subparts of the assignment """ RESULTS = {} subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1) # Part 1 (Size of the network) RESULTS['nodeCount'] = subGraph.GetNodes() RESULTS['edgeCount'] = subGraph.GetEdges() # Part 2 (Degree of nodes in the network) maxDegree = 0 maxDegreeNodes = [] degree7Count = 0 for node in subGraph.Nodes(): if node.GetDeg() == 7: degree7Count += 1 maxDegree = max(maxDegree, node.GetDeg()) for node in subGraph.Nodes(): if node.GetDeg() == maxDegree: maxDegreeNodes.append(node.GetId()) plotFilename = f"deg_dist_{elistName}" # Since it is an undirected graph, in/out degree is unimportant snap.PlotOutDegDistr(subGraph, plotFilename) RESULTS['maxDegree'] = maxDegree RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes)) RESULTS['degree7Count'] = degree7Count # Part 3 (Paths in the network) # Full Diameter Calculation fullDiameters = { 10: snap.GetBfsFullDiam(subGraph, 10, False), 100: snap.GetBfsFullDiam(subGraph, 100, False), 1000: snap.GetBfsFullDiam(subGraph, 1000, False) } fullMean, fullVariance = meanVariance(fullDiameters.values()) fullDiameters['mean'] = fullMean fullDiameters['variance'] = fullVariance RESULTS['fullDiameters'] = fullDiameters # Effective Diameter Calculation effDiameters = { 10: snap.GetBfsEffDiam(subGraph, 10, False), 100: snap.GetBfsEffDiam(subGraph, 100, False), 1000: snap.GetBfsEffDiam(subGraph, 1000, False), } effMean, effVariance = meanVariance(effDiameters.values()) effDiameters['mean'] = effMean effDiameters['variance'] = effVariance RESULTS['effDiameters'] = effDiameters plotFilename = f"shortest_path_{elistName}" snap.PlotShortPathDistr(subGraph, plotFilename) # Part 4 (Components of the network) edgeBridges = snap.TIntPrV() articulationPoints = snap.TIntV() RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph) snap.GetEdgeBridges(subGraph, edgeBridges) snap.GetArtPoints(subGraph, articulationPoints) RESULTS['edgeBridges'] = len(edgeBridges) RESULTS['articulationPoints'] = len(articulationPoints) plotFilename = f"connected_comp_{elistName}" snap.PlotSccDistr(subGraph, plotFilename) # Part 5 (Connectivity and clustering in the network) RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1) RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0] nodeX = subGraph.GetRndNId(Rnd) nodeY = subGraph.GetRndNId(Rnd) RESULTS['randomClusterCoefficient'] = (nodeX, snap.GetNodeClustCf( subGraph, nodeX)) RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY)) RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph) plotFilename = f"clustering_coeff_{elistName}" snap.PlotClustCf(subGraph, plotFilename) return RESULTS
round(get_mean(effective_diameter), 4), ',', round(get_variance(effective_diameter), 4), sep="") snap.PlotShortPathDistr(graph, "temp", "Undirected graph - shortest path") os.system("mv diam.temp.png plots/shortest_path_" + subgraph_name + ".png") os.system("rm diam.*") print("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(graph), 4)) print("Number of edge bridges:", get_bridges(graph).Len()) print("Number of articulation points:", get_articulation_points(graph).Len()) snap.PlotSccDistr(graph, "temp", "Undirected graph - scc distribution") os.system("mv scc.temp.png plots/connected_comp_" + subgraph_name + ".png") os.system("rm scc.*") print("Average clustering coefficient:", round(snap.GetClustCf(graph), 4)) print("Number of triads:", snap.GetTriads(graph)) random_node = graph.GetRndNId() print("Clustering coefficient of random node", random_node, ":", round(get_each_nodes_ClusteringCofficient(graph)[random_node], 4)) random_node = graph.GetRndNId() print("Number of triads random node", random_node, "participates:", snap.GetNodeTriads(graph, random_node)) print("Number of edges that participate in at least one triad:", snap.GetTriadEdges(graph)) snap.PlotClustCf(graph, "temp",
def main(): parentDir = os.getcwd() os.chdir(parentDir + "/subgraphs") sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1) subGraphName = sys.argv[1].split(".")[0] os.chdir(parentDir) #### 1 ######## node_count = 0 for node in sub_graph.Nodes(): node_count = node_count + 1 printWithOutNewLine("Number of nodes:", node_count) printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph)) #### 2 ######## printWithOutNewLine("Number of nodes with degree=7:", snap.CntDegNodes(sub_graph, 7)) rndMaxDegNId = snap.GetMxDegNId(sub_graph) nodeDegPairs = snap.TIntPrV() snap.GetNodeInDegV(sub_graph, nodeDegPairs) maxDegVal = 0 for pair in nodeDegPairs: if (pair.GetVal1() == rndMaxDegNId): maxDegVal = pair.GetVal2() break maxDegNodes = [] for pair in nodeDegPairs: if (pair.GetVal2() == maxDegVal): maxDegNodes.append(pair.GetVal1()) print("Node id(s) with highest degree:", end=" ") print(*maxDegNodes, sep=',') #### 3 ######## sampledFullDiam = [] sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False)) sampledFullDiamStats = [] sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4)) sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4)) printWithOutNewLine("Approximate full diameter by sampling 10 nodes:", sampledFullDiam[0]) printWithOutNewLine("Approximate full diameter by sampling 100 nodes:", sampledFullDiam[1]) printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:", sampledFullDiam[2]) print("Approximate full diameter (mean and variance):", end=" ") print(*sampledFullDiamStats, sep=',') sampledEffDiam = [] sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4)) sampledEffDiamStats = [] sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4)) sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4)) printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:", sampledEffDiam[0]) printWithOutNewLine( "Approximate effective diameter by sampling 100 nodes:", sampledEffDiam[1]) printWithOutNewLine( "Approximate effective diameter by sampling 1000 nodes:", sampledEffDiam[2]) print("Approximate effective diameter (mean and variance):", end=" ") print(*sampledEffDiamStats, sep=',') #### 4 ######## printWithOutNewLine("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(sub_graph), 4)) bridgeEdges = snap.TIntPrV() snap.GetEdgeBridges(sub_graph, bridgeEdges) printWithOutNewLine("Number of edge bridges:", len(bridgeEdges)) articulationPoints = snap.TIntV() snap.GetArtPoints(sub_graph, articulationPoints) printWithOutNewLine("Number of articulation points:", len(articulationPoints)) #### 5 ######## printWithOutNewLine("Average clustering coefficient:", round(snap.GetClustCf(sub_graph, -1), 4)) printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1)) randomNodeId = sub_graph.GetRndNId() nodeIdCcfMap = snap.TIntFltH() snap.GetNodeClustCf(sub_graph, nodeIdCcfMap) print("Clustering coefficient of random node", end=" ") print(randomNodeId, end=": ") print(round(nodeIdCcfMap[randomNodeId], 4)) print("Number of triads random node", end=" ") print(randomNodeId, end=" participates: ") print(snap.GetNodeTriads(sub_graph, randomNodeId)) printWithOutNewLine( "Number of edges that participate in at least one triad:", snap.GetTriadEdges(sub_graph, -1)) #### plots ######## if not os.path.isdir('plots'): os.makedirs('plots') os.chdir(parentDir + "/plots") plotsDir = os.getcwd() snap.PlotOutDegDistr(sub_graph, subGraphName, subGraphName + " Subgraph Degree Distribution") snap.PlotShortPathDistr( sub_graph, subGraphName, subGraphName + " Subgraph Shortest Path Lengths Distribution") snap.PlotSccDistr( sub_graph, subGraphName, subGraphName + " Subgraph Connected Components Size Distribution") snap.PlotClustCf( sub_graph, subGraphName, subGraphName + " Subgraph Clustering Coefficient Distribution") files = os.listdir(plotsDir) for file in files: if not file.endswith(".png"): os.remove(os.path.join(plotsDir, file)) plots = os.listdir(plotsDir) filePrefix = "filename" for file in plots: nameSplit = file.split(".") if (len(nameSplit) == 2): continue if (nameSplit[0] == "ccf"): filePrefix = "clustering_coeff_" elif (nameSplit[0] == "outDeg"): filePrefix = "deg_dist_" elif (nameSplit[0] == "diam"): filePrefix = "shortest_path_" elif (nameSplit[0] == "scc"): filePrefix = "connected_comp_" os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2]) os.chdir(parentDir)
from graph import * import snap # plot deg dist snap.PlotInDegDistr(graph, "InDegDist", flnme+" in-degree distribution") snap.PlotOutDegDistr(graph, "OutDegDist", flnme+" out-degree distribution") # plot connected components dist snap.PlotSccDistr(graph, "SccDist", flnme+" strongly connected components distribution") snap.PlotWccDistr(graph, "WccDist", flnme+" weakly connected components distribution") # plot cluster coefficient snap.PlotClustCf(graph, "ClustCoef", flnme+" clustering coefficient")
def Scc(): return snap.PlotSccDistr(Graph, "ScaleFreeScc", "Undirected graph - scc distribution")
print "Approx. effective diameter in " + input_file + " with sampling ", i, " nodes: ", round( diameter[index], 3) index = index + 1 mean = float(sum(diameter) / 3.0) variance = float((pow((diameter[0] - mean), 2) + pow( (diameter[1] - mean), 2) + pow((diameter[2] - mean), 2)) / 2.0) print "Approx. effective diameter in " + input_file + " (mean and variance): ", round( mean, 3), ", ", round(variance, 3) snap.PlotShortPathDistr(Graph1, "shortest_path_plot_" + input_file, "Undirected graph - shortest path", 1000) print "Shortest path distribution of " + input_file + " is in: diam.shortest_path_plot_" + input_file + ".png" largest_component = snap.TCnComV() snap.GetSccs(Graph1, largest_component) largest = 0.0 for item in largest_component: if largest < item.Len(): largest = item.Len() print "" print "Fraction of nodes in largest connected component in " + input_file + ": ", float( largest) / float(final_nodes) snap.PlotSccDistr(Graph1, "conn_components_plot_" + input_file, "Undirected graph - Connected components distribution") print "Component size distribution of " + input_file + " is in: scc.conn_components_plot_" + input_file + ".png"
def compute_graph_statistics(graph_path, overwrite, compute_betweenness=False): graph_abs_path = os.path.abspath(graph_path) graph_name = os.path.basename(graph_abs_path).replace(".graph", "") fin = snap.TFIn(graph_abs_path) graph = snap.TNEANet.Load(fin) # rebuild the id => pkg dictionary id_pkg_dict = {} for node in graph.Nodes(): id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg") directory = os.path.dirname(os.path.abspath(graph_path)) json_path = os.path.join(directory, graph_name + "_statistics.json") if os.path.isfile(json_path): with open(json_path, "r") as f: statistics = json.load(f, object_pairs_hook=OrderedDict) else: statistics = OrderedDict() # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory os.chdir(directory) # general statistics output = os.path.join(directory, graph_name + "_main_statistics.txt") if not os.path.isfile(output) or overwrite: print("{0} Computing general statistics".format(datetime.datetime.now())) snap.PrintInfo(graph, "Play Store Graph -- main statistics", output, False) # info about the nodes with the max in degree if "max_in_degree" not in statistics or overwrite: print("{0} Computing max indegree".format(datetime.datetime.now())) max_in_deg_id = snap.GetMxInDegNId(graph) iterator = graph.GetNI(max_in_deg_id) max_in_deg = iterator.GetInDeg() max_in_deg_pkg = graph.GetStrAttrDatN(max_in_deg_id, "pkg") statistics["max_in_degree"] = max_in_deg statistics["max_in_degree_id"] = max_in_deg_id statistics["max_in_degree_pkg"] = max_in_deg_pkg # info about the nodes with the max out degree if "max_out_degree" not in statistics or overwrite: print("{0} Computing max outdegree".format(datetime.datetime.now())) max_out_deg_id = snap.GetMxOutDegNId(graph) iterator = graph.GetNI(max_out_deg_id) max_out_deg = iterator.GetOutDeg() max_out_deg_pkg = graph.GetStrAttrDatN(max_out_deg_id, "pkg") statistics["max_out_degree"] = max_out_deg statistics["max_out_degree_id"] = max_out_deg_id statistics["max_out_degree_pkg"] = max_out_deg_pkg # pagerank statistics output = graph_name + "_topNpagerank.eps" if not os.path.isfile(output) or "top_n_pagerank" not in statistics or overwrite: print("{0} Computing top 20 nodes with highest pagerank".format(datetime.datetime.now())) data_file = graph_name + "_pageranks" prank_hashtable = snap.TIntFltH() if not os.path.isfile(data_file) or overwrite: # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100 snap.GetPageRank(graph, prank_hashtable, 0.85) fout = snap.TFOut(data_file) prank_hashtable.Save(fout) else: fin = snap.TFIn(data_file) prank_hashtable.Load(fin) top_n = get_top_nodes_from_hashtable(prank_hashtable) top_n.sort(key=itemgetter(1)) if "top_n_pagerank" not in statistics or overwrite: top_n_labeled = [] for pair in top_n: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_pagerank"] = list(reversed(top_n_labeled)) if not os.path.isfile(output) or overwrite: # let's build a subgraph induced on the top 20 pagerank nodes subgraph = get_subgraph(graph, [x[0] for x in top_n]) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(prank_hashtable, [x[0] for x in top_n]) plot_subgraph_colored(subgraph, labels_dict, values, "PageRank", "Play Store Graph - top 20 PageRank nodes", output, "autumn_r") # betweeness statistics output = graph_name + "_topNbetweenness.eps" if compute_betweenness and (not os.path.isfile(output) or "betweenness" not in statistics or overwrite): print("{0} Computing top 20 nodes with highest betweenness".format(datetime.datetime.now())) data_file1 = graph_name + "_node_betweenness" data_file2 = graph_name + "_edge_betweenness" node_betwenness_hashtable = snap.TIntFltH() edge_betwenness_hashtable = snap.TIntPrFltH() if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite: snap.GetBetweennessCentr(graph, node_betwenness_hashtable, edge_betwenness_hashtable, 0.85, True) fout = snap.TFOut(data_file1) node_betwenness_hashtable.Save(fout) fout = snap.TFOut(data_file2) edge_betwenness_hashtable.Save(fout) else: fin = snap.TFIn(data_file1) node_betwenness_hashtable.Load(fin) fin = snap.TFIn(data_file2) edge_betwenness_hashtable.Load(fin) # unused, as now top_n = get_top_nodes_from_hashtable(node_betwenness_hashtable) top_n.sort(key=itemgetter(1)) if "top_n_betweenness" not in statistics or overwrite: top_n_labeled = [] for pair in top_n: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_betweenness"] = list(reversed(top_n_labeled)) if not os.path.isfile(output) or overwrite: # let's build a subgraph induced on the top 20 betweenness nodes subgraph = get_subgraph(graph, [x[0] for x in top_n]) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(node_betwenness_hashtable, [x[0] for x in top_n]) plot_subgraph_colored(subgraph, labels_dict, values, "Betweenness", "Play Store Graph - top 20 Betweenness nodes", output) # HITS statistics output_hub = graph_name + "_topNhitshubs.eps" output_auth = graph_name + "_topNhitsauth.eps" if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or "top_n_hits_hubs" not in statistics \ or "top_n_hits_authorities" not in statistics or overwrite: print("{0} Computing top 20 HITS hubs and auths".format(datetime.datetime.now())) data_file1 = graph_name + "_hits_hubs" data_file2 = graph_name + "_hits_auth" hubs_hashtable = snap.TIntFltH() auth_hashtable = snap.TIntFltH() if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite: # MaxIter = 20 snap.GetHits(graph, hubs_hashtable, auth_hashtable, 20) fout = snap.TFOut(data_file1) hubs_hashtable.Save(fout) fout = snap.TFOut(data_file2) auth_hashtable.Save(fout) else: fin = snap.TFIn(data_file1) hubs_hashtable.Load(fin) fin = snap.TFIn(data_file2) auth_hashtable.Load(fin) top_n_hubs = get_top_nodes_from_hashtable(hubs_hashtable) top_n_hubs.sort(key=itemgetter(1)) if "top_n_hits_hubs" not in statistics or overwrite: top_n_labeled = [] for pair in top_n_hubs: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_hits_hubs"] = list(reversed(top_n_labeled)) top_n_auth = get_top_nodes_from_hashtable(auth_hashtable) top_n_auth.sort(key=itemgetter(1)) if "top_n_hits_authorities" not in statistics or overwrite: top_n_labeled = [] for pair in top_n_auth: top_n_labeled.append((id_pkg_dict[pair[0]], pair[1])) statistics["top_n_hits_authorities"] = list(reversed(top_n_labeled)) if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or overwrite: nodes_subset = set() for pair in top_n_hubs: nodes_subset.add(pair[0]) for pair in top_n_auth: nodes_subset.add(pair[0]) # let's build a subgraph induced on the top N HITS auths and hubs nodes subgraph = get_subgraph(graph, nodes_subset) labels_dict = get_labels_subset(id_pkg_dict, subgraph) values = snap_hashtable_to_dict(hubs_hashtable, nodes_subset) values2 = snap_hashtable_to_dict(auth_hashtable, nodes_subset) plot_subgraph_colored(subgraph, labels_dict, values, "HITS - Hub Index", "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_hub, "bwr") plot_subgraph_colored(subgraph, labels_dict, values2, "HITS - Authority Index", "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_auth, "bwr_r") # indegree histogram output = graph_name + "_indegree" if not os.path.isfile("inDeg." + output + ".plt") or not os.path.isfile( "inDeg." + output + ".tab") or not os.path.isfile("inDeg." + output + ".png") or overwrite: print("{0} Computing indegree distribution".format(datetime.datetime.now())) snap.PlotInDegDistr(graph, output, "Play Store Graph - in-degree Distribution") # outdegree histogram output = graph_name + "_outdegree" if not os.path.isfile("outDeg." + output + ".plt") or not os.path.isfile( "outDeg." + output + ".tab") or not os.path.isfile( "outDeg." + output + ".png") or overwrite: print("{0} Computing outdegree distribution".format(datetime.datetime.now())) snap.PlotOutDegDistr(graph, output, "Play Store Graph - out-degree Distribution") # strongly connected components print output = graph_name + "_scc" if not os.path.isfile("scc." + output + ".plt") or not os.path.isfile( "scc." + output + ".tab") or not os.path.isfile("scc." + output + ".png") or overwrite: print("{0} Computing scc distribution".format(datetime.datetime.now())) snap.PlotSccDistr(graph, output, "Play Store Graph - strongly connected components distribution") # weakly connected components print output = graph_name + "_wcc" if not os.path.isfile("wcc." + output + ".plt") or not os.path.isfile( "wcc." + output + ".tab") or not os.path.isfile("wcc." + output + ".png") or overwrite: print("{0} Computing wcc distribution".format(datetime.datetime.now())) snap.PlotWccDistr(graph, output, "Play Store Graph - weakly connected components distribution") # clustering coefficient distribution output = graph_name + "_cf" if not os.path.isfile("ccf." + output + ".plt") or not os.path.isfile( "ccf." + output + ".tab") or not os.path.isfile("ccf." + output + ".png") or overwrite: print("{0} Computing cf distribution".format(datetime.datetime.now())) snap.PlotClustCf(graph, output, "Play Store Graph - clustering coefficient distribution") # shortest path distribution output = graph_name + "_hops" if not os.path.isfile("hop." + output + ".plt") or not os.path.isfile( "hop." + output + ".tab") or not os.path.isfile("hop." + output + ".png") or overwrite: print("{0} Computing shortest path distribution".format(datetime.datetime.now())) snap.PlotHops(graph, output, "Play Store Graph - Cumulative Shortest Paths (hops) distribution", True) # k-core edges distribution output = graph_name + "_kcore_edges" if not os.path.isfile("coreEdges." + output + ".plt") or not os.path.isfile( "coreEdges." + output + ".tab") or not os.path.isfile( "coreEdges." + output + ".png") or overwrite: print("{0} Computing k-core edges distribution".format(datetime.datetime.now())) snap.PlotKCoreEdges(graph, output, "Play Store Graph - K-Core edges distribution") # k-core nodes distribution output = graph_name + "_kcore_nodes" if not os.path.isfile("coreNodes." + output + ".plt") or not os.path.isfile( "coreNodes." + output + ".tab") or not os.path.isfile( "coreNodes." + output + ".png") or overwrite: print("{0} Computing k-core nodes distribution".format(datetime.datetime.now())) snap.PlotKCoreNodes(graph, output, "Play Store Graph - K-Core nodes distribution") with open(json_path, 'w') as outfile: json.dump(statistics, outfile, indent=2)
ArtNIdV = snap.TIntV() snap.GetArtPoints(p2p_gnutella04_subgraph, ArtNIdV) art_point = 0 for NI in ArtNIdV: art_point = art_point + 1 print "Number of articulation points in p2p-Gnutella04-subgraph :" + str( art_point) # Task 1.2.4.4 if (sub_graph_name == "soc-Epinions1-subgraph"): #Plotting the distribution of sizes of connected components snap.PlotSccDistr(soc_epinions1_subgraph, "soc-Epinions1-subgraph", "Undirected Scc Distribution") print "Component size Distribution of soc-Epinions1-subgraph is in :" + 'scc.soc-Epinions1-subgraph.png' if (sub_graph_name == "cit-HepPh-subgraph"): #Plotting the distribution of sizes of connected components snap.PlotSccDistr(cit_heph_subgraph, "cit-HepPh-subgraph", "Undirected Scc Distribution") print " Component size Distribution of cit-HepPh-subgraph is in :" + 'scc.cit-HepPh-subgraph.png' if (sub_graph_name == "email-Enron-subgraph"): #Plotting the distribution of sizes of connected component snap.PlotSccDistr(email_enron_subgraph, "email-Enron-subgraph", "Undirected Scc Distribution") print "Component size Distribution of email-Enron-subgraph is in :" + 'scc.email-Enron-subgraph.png' if (sub_graph_name == "p2p-Gnutella04-subgraph"): #Plotting the distribution of sizes of connected components
def connectivity(self): snap.PlotSccDistr(self.graph, "Connectivity", "Connectivity") img = mpimg.imread("scc.Connectivity.png") plt.figure() imgplot = plt.imshow(img) plt.show()
ComponentDist = snap.TIntPrV() snap.GetWccSzCnt(G, ComponentDist) size = [] counts = [] print "WCC counts" for comp in ComponentDist: size.append(comp.GetVal1()) counts.append(comp.GetVal2()) print "Size: %d Count: %d" % (comp.GetVal1(), comp.GetVal2()) plt.clf() plt.figure() plt.plot(size, counts, '.') ComponentDist2 = snap.TIntPrV() snap.GetWccSzCnt(G, ComponentDist2) print "SCC counts" for comp in ComponentDist2: print "Size: %d Count: %d" % (comp.GetVal1(), comp.GetVal2()) plt.title("Youtube Video WCC Size Distribution") plt.xlabel("WCC Size") plt.ylabel("Number of WCC of given size") plt.savefig("wcc-distr3.pdf") snap.PlotWccDistr(G, "wcc-distr3", "Directed Related Video Graph - WCC distribution") print "getting SCC size distribution..." snap.PlotSccDistr(G, "scc-distr3", "Directed Related Video Graph - SCC distribution")
snap.GetArtPoints(G, ArtNIdV) print("Number of articulation points:", len(ArtNIdV)) print("Average clustering coefficient: %.4f" % snap.GetClustCf(G, -1)) print("Number of triads:", snap.GetTriads(G, -1)) Ran_n = G.GetRndNId(Rnd) print("Clustering coefficient of random node %d: %.4f" % (Ran_n, snap.GetNodeClustCf(G, Ran_n))) Ran_n = G.GetRndNId(Rnd) print("Number of triads random node %d participates: %d" % (Ran_n, snap.GetNodeTriads(G, Ran_n))) print("Number of edges that participate in at least one triad:", snap.GetTriadEdges(G)) snap.PlotInDegDistr(G, "D_" + sys.argv[1], "Degree Distribution") MoveFile(os.path.join(dirname, "inDeg.D_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "deg_dist_" + sys.argv[1] + ".png")) snap.PlotShortPathDistr(G, "S_" + sys.argv[1], "Shortest path Distribution") MoveFile( os.path.join(dirname, "diam.S_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "shortest_path_" + sys.argv[1] + ".png")) snap.PlotSccDistr(G, "C_" + sys.argv[1], "Component Size Distribution") MoveFile( os.path.join(dirname, "scc.C_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "connected_comp_" + sys.argv[1] + ".png")) snap.PlotClustCf(G, "C_" + sys.argv[1], "Clustering Coefficient Distribution") MoveFile( os.path.join(dirname, "ccf.C_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "clustering_coeff_" + sys.argv[1] + ".png"))
MxConCompSize = sn.GetMxScc(graph).GetNodes() print("Fraction of nodes in largest connected component: {:0.4f}".format( MxConCompSize / graph.GetNodes())) ## Edge Bridges edgeBridge = sn.TIntPrV() sn.GetEdgeBridges(graph, edgeBridge) print("Number of edge bridges: {}".format(len(edgeBridge))) ## Articulation Points artPoints = sn.TIntV() sn.GetArtPoints(graph, artPoints) print("Number of articulation points: {}".format(len(artPoints))) ## Connected Components Distribution sn.PlotSccDistr(graph, name, "Connected Component Distribution") plotRemove("scc", "connected_comp", name) #Question 5 ## Clustering Coefficient print("Average clustering coefficient: {:0.4f}".format( sn.GetClustCf(graph))) ## Triads print("Number of triads: {}".format(sn.GetTriads(graph))) ## Random Clustering Coefficient rndNode = graph.GetRndNId() print("Clustering coefficient of random node {}: {:0.4f}".format( rndNode, sn.GetNodeClustCf(graph, rndNode)))
em = mean(effData) ev = variance(effData) print "Approx. effective diameter in %s with sampling 10 nodes: %d" % ( file, effDiam10) print "Approx. effective diameter in %s with sampling 100 nodes: %d" % ( file, effDiam100) print "Approx. effective diameter in %s with sampling 1000 nodes: %d" % ( file, effDiam1000) print "Approx. effective diameter in %s (mean and variance): %d, %d\n" % ( file, em, ev) # c) Plot of the distribution of the shortest path plotFN1 = file + ".diam.short-path-plot.png" snap.PlotShortPathDistr(UGraph, plotFN1, "Undirected graph - Shortest path for file " + file) print "\nShortest path distribution of %s is in: %s\n" % (file, plotFN1) # 4) Components of the network: print "Components of the network:\n" # a) Fraction of nodes in the largest connected component nodeFrac = snap.GetMxSccSz(UGraph) print "Fraction of nodes in largest connected component in '%s': %d\n" % ( file, nodeFrac) # b) Plot of the distribution of sizes of connected components. plotFN2 = file + ".scc.connected-components-plot.png" snap.PlotSccDistr(UGraph, plotFN2, "Undirected graph - scc distribution for file " + file) print "\nComponent size distribution of %s is in: %s\n" % (file, plotFN2) # end of program print "\n\t End of program\n\n"