def calculate_shortest_path_lengths_distribution(graph, hashtag): start = time.time() print("Calculating shortest path lengths distribution...") snap.PlotShortPathDistr(graph, hashtag + "_shortestPathLengthsDist", "Shortest Path Lengths Distribution") end = time.time() print("Completed in: %s" % timedelta(seconds=(int(end - start))))
def shortest_path_distribution_plot(G): """ Saves the shortest path distribution plot of the subgraph G The file is saved in the directory './plots/shortest_path_<subgraph_name>.png' """ snap.PlotShortPathDistr(G, sys.argv[-1], f"Shortest Path Distribution in {sys.argv[-1]}") try: os.mkdir('./plots') except: pass os.rename(f'diam.{sys.argv[-1]}.png', f'./plots/shortest_path_{sys.argv[-1]}.png') os.remove(f'diam.{sys.argv[-1]}.plt') os.remove(f'diam.{sys.argv[-1]}.tab')
def genGraphInfo(self): graphName = self.graphName # get the number of nodes and edges in the graph print "Number of nodes in %s: %d" % (graphName, self.G.GetNodes()) print "Number of edges in %s: %d" % (graphName, self.G.GetEdges()) # get the node id(s) with highest degree nodeIdMaxDegree = snap.GetMxOutDegNId(self.G) maxDegree = -1 for node in self.G.Nodes(): if (node.GetId() == nodeIdMaxDegree): maxDegree = node.GetOutDeg() break nodeIdsMaxDegreeT = "" for node in self.G.Nodes(): if (maxDegree == node.GetOutDeg()): nodeIdsMaxDegreeT += str(node.GetId()) + "," print "Node id(s) with highest degree in %s: %s" % (graphName, nodeIdsMaxDegreeT) # plot degree distribution snap.PlotOutDegDistr(self.G, graphName, "Degree Distribution") degreeFileName = "outDeg." + graphName + ".png" print "Degree distribution of %s is in: %s" % (graphName, degreeFileName) # plot shortest path distribution snap.PlotShortPathDistr(self.G, graphName, "Shortest Path Distribution") shortestPathFileName = "diam." + graphName + ".png" print "Shortest path distribution of %s is in: %s" % ( graphName, shortestPathFileName) # get the fraction of nodes in largest cc print "Fraction of nodes in largest connected component in %s: %f" % ( graphName, snap.GetMxSccSz(self.G)) # plot the component size distribution snap.PlotSccDistr(self.G, graphName, "Component size distribution") sccFileName = "scc." + graphName + ".png" print "Component size distribution of %s is in: %s" % (graphName, sccFileName)
def solve_shortest_path_based_questions(G, GName): Fulldiam1 = snap.GetBfsFullDiam(G, 10, False) print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 10, Fulldiam1) Fulldiam2 = snap.GetBfsFullDiam(G, 100, False) print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 100, Fulldiam2) Fulldiam3 = snap.GetBfsFullDiam(G, 1000, False) print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 1000, Fulldiam3) temp = np.array([Fulldiam1, Fulldiam2, Fulldiam3]) print "Approximate full diameter in {0} with sampling nodes (mean and variance): {1}, {2}".format( GName[:10], np.mean(temp), np.var(temp)) effdiam1 = snap.GetBfsEffDiam(G, 10, False) print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 10, effdiam1) effdiam2 = snap.GetBfsEffDiam(G, 100, False) print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 100, effdiam2) effdiam3 = snap.GetBfsEffDiam(G, 1000, False) print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 1000, effdiam3) temp = np.array([effdiam1, effdiam2, effdiam3]) print "Approximate full diameter in {0} with sampling nodes (mean and variance): {1}, {2}".format( GName[:10], np.mean(temp), np.var(temp)) snap.PlotShortPathDistr(G, GName[:-10], GName[:-10] + " - shortest path") filename = "diam." + GName[:-10] + ".png" print "Shortest path distribution of {0} is in: {1}".format( GName[:-10], filename)
def main(): parentDir = os.getcwd() os.chdir(parentDir + "/subgraphs") sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1) subGraphName = sys.argv[1].split(".")[0] os.chdir(parentDir) #### 1 ######## node_count = 0 for node in sub_graph.Nodes(): node_count = node_count + 1 printWithOutNewLine("Number of nodes:", node_count) printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph)) #### 2 ######## printWithOutNewLine("Number of nodes with degree=7:", snap.CntDegNodes(sub_graph, 7)) rndMaxDegNId = snap.GetMxDegNId(sub_graph) nodeDegPairs = snap.TIntPrV() snap.GetNodeInDegV(sub_graph, nodeDegPairs) maxDegVal = 0 for pair in nodeDegPairs: if (pair.GetVal1() == rndMaxDegNId): maxDegVal = pair.GetVal2() break maxDegNodes = [] for pair in nodeDegPairs: if (pair.GetVal2() == maxDegVal): maxDegNodes.append(pair.GetVal1()) print("Node id(s) with highest degree:", end=" ") print(*maxDegNodes, sep=',') #### 3 ######## sampledFullDiam = [] sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False)) sampledFullDiamStats = [] sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4)) sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4)) printWithOutNewLine("Approximate full diameter by sampling 10 nodes:", sampledFullDiam[0]) printWithOutNewLine("Approximate full diameter by sampling 100 nodes:", sampledFullDiam[1]) printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:", sampledFullDiam[2]) print("Approximate full diameter (mean and variance):", end=" ") print(*sampledFullDiamStats, sep=',') sampledEffDiam = [] sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4)) sampledEffDiamStats = [] sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4)) sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4)) printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:", sampledEffDiam[0]) printWithOutNewLine( "Approximate effective diameter by sampling 100 nodes:", sampledEffDiam[1]) printWithOutNewLine( "Approximate effective diameter by sampling 1000 nodes:", sampledEffDiam[2]) print("Approximate effective diameter (mean and variance):", end=" ") print(*sampledEffDiamStats, sep=',') #### 4 ######## printWithOutNewLine("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(sub_graph), 4)) bridgeEdges = snap.TIntPrV() snap.GetEdgeBridges(sub_graph, bridgeEdges) printWithOutNewLine("Number of edge bridges:", len(bridgeEdges)) articulationPoints = snap.TIntV() snap.GetArtPoints(sub_graph, articulationPoints) printWithOutNewLine("Number of articulation points:", len(articulationPoints)) #### 5 ######## printWithOutNewLine("Average clustering coefficient:", round(snap.GetClustCf(sub_graph, -1), 4)) printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1)) randomNodeId = sub_graph.GetRndNId() nodeIdCcfMap = snap.TIntFltH() snap.GetNodeClustCf(sub_graph, nodeIdCcfMap) print("Clustering coefficient of random node", end=" ") print(randomNodeId, end=": ") print(round(nodeIdCcfMap[randomNodeId], 4)) print("Number of triads random node", end=" ") print(randomNodeId, end=" participates: ") print(snap.GetNodeTriads(sub_graph, randomNodeId)) printWithOutNewLine( "Number of edges that participate in at least one triad:", snap.GetTriadEdges(sub_graph, -1)) #### plots ######## if not os.path.isdir('plots'): os.makedirs('plots') os.chdir(parentDir + "/plots") plotsDir = os.getcwd() snap.PlotOutDegDistr(sub_graph, subGraphName, subGraphName + " Subgraph Degree Distribution") snap.PlotShortPathDistr( sub_graph, subGraphName, subGraphName + " Subgraph Shortest Path Lengths Distribution") snap.PlotSccDistr( sub_graph, subGraphName, subGraphName + " Subgraph Connected Components Size Distribution") snap.PlotClustCf( sub_graph, subGraphName, subGraphName + " Subgraph Clustering Coefficient Distribution") files = os.listdir(plotsDir) for file in files: if not file.endswith(".png"): os.remove(os.path.join(plotsDir, file)) plots = os.listdir(plotsDir) filePrefix = "filename" for file in plots: nameSplit = file.split(".") if (len(nameSplit) == 2): continue if (nameSplit[0] == "ccf"): filePrefix = "clustering_coeff_" elif (nameSplit[0] == "outDeg"): filePrefix = "deg_dist_" elif (nameSplit[0] == "diam"): filePrefix = "shortest_path_" elif (nameSplit[0] == "scc"): filePrefix = "connected_comp_" os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2]) os.chdir(parentDir)
diameter = [0, 0, 0] index = 0 for i in [10, 100, 1000]: diameter[index] = snap.GetBfsEffDiam(Graph1, i, False) print "Approx. effective diameter in " + input_file + " with sampling ", i, " nodes: ", round( diameter[index], 3) index = index + 1 mean = float(sum(diameter) / 3.0) variance = float((pow((diameter[0] - mean), 2) + pow( (diameter[1] - mean), 2) + pow((diameter[2] - mean), 2)) / 2.0) print "Approx. effective diameter in " + input_file + " (mean and variance): ", round( mean, 3), ", ", round(variance, 3) snap.PlotShortPathDistr(Graph1, "shortest_path_plot_" + input_file, "Undirected graph - shortest path", 1000) print "Shortest path distribution of " + input_file + " is in: diam.shortest_path_plot_" + input_file + ".png" largest_component = snap.TCnComV() snap.GetSccs(Graph1, largest_component) largest = 0.0 for item in largest_component: if largest < item.Len(): largest = item.Len() print "" print "Fraction of nodes in largest connected component in " + input_file + ": ", float( largest) / float(final_nodes)
def ShortPath(): return snap.PlotShortPathDistr(Graph, "ScaleFreeShortestPath", "Undirected graph - shortest path")
value_new = [ snap.GetBfsEffDiam(p2p_gnutella04_subgraph, 10, v4, True)[0], snap.GetBfsEffDiam(p2p_gnutella04_subgraph, 100, v4, True)[0], snap.GetBfsEffDiam(p2p_gnutella04_subgraph, 1000, v4, True)[0] ] print "Approximate Effective diameter in p2p-Gnutella04-subgraph with sampling nodes(mean and variance):" + str( round(statistics.mean(value_new), 3)) + "," + str( round(statistics.variance(value_new), 4)) # Task 1.2.3.3 if (sub_graph_name == "soc-Epinions1-subgraph"): # Plotting the distribution of shortest Length snap.PlotShortPathDistr(soc_epinions1_subgraph, "soc-Epinions1-subgraph", "Undirected graph - shortest path") print "Shortest path distribution of soc-Epinions1-subgraph is in :" + "diam.soc-Epinions1-subgraph.png" if (sub_graph_name == "cit-HepPh-subgraph"): # Plotting the distribution of shortest Length snap.PlotShortPathDistr(cit_heph_subgraph, "cit-HepPh-subgraph", "Undirected graph - shortest path") print "Shortest path distribution of cit-HepPh-subgraph is in :" + "diam.cit-HepPh-subgraph.png" if (sub_graph_name == "email-Enron-subgraph"): # Plotting the distribution of shortest Length snap.PlotShortPathDistr(email_enron_subgraph, "email-Enron-subgraph", "Undirected graph - shortest path") print "Shortest path distribution of email-Enron-subgraph is in :" + "diam.email-Enron-subgraph.png" if (sub_graph_name == "p2p-Gnutella04-subgraph"): # Plotting the distribution of shortest Length
import snap Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000) snap.PlotShortPathDistr(Graph, "example", "Directed graph - shortest path") snap.DrawGViz(Graph, snap.gvlDot, "graph.png", "graph 1") UGraph = snap.GenRndGnm(snap.PUNGraph, 100, 1000) snap.PlotShortPathDistr(UGraph, "example", "Undirected graph - shortest path") snap.DrawGViz(UGraph, snap.gvlNeato, "graph_undirected.png", "graph 2", True) Network = snap.GenRndGnm(snap.PNEANet, 100, 1000) snap.PlotShortPathDistr(Network, "example", "Network - shortest path")
effData = [effDiam10, effDiam100, effDiam1000] em = mean(effData) ev = variance(effData) print "Approx. effective diameter in %s with sampling 10 nodes: %d" % ( file, effDiam10) print "Approx. effective diameter in %s with sampling 100 nodes: %d" % ( file, effDiam100) print "Approx. effective diameter in %s with sampling 1000 nodes: %d" % ( file, effDiam1000) print "Approx. effective diameter in %s (mean and variance): %d, %d\n" % ( file, em, ev) # c) Plot of the distribution of the shortest path plotFN1 = file + ".diam.short-path-plot.png" snap.PlotShortPathDistr(UGraph, plotFN1, "Undirected graph - Shortest path for file " + file) print "\nShortest path distribution of %s is in: %s\n" % (file, plotFN1) # 4) Components of the network: print "Components of the network:\n" # a) Fraction of nodes in the largest connected component nodeFrac = snap.GetMxSccSz(UGraph) print "Fraction of nodes in largest connected component in '%s': %d\n" % ( file, nodeFrac) # b) Plot of the distribution of sizes of connected components. plotFN2 = file + ".scc.connected-components-plot.png" snap.PlotSccDistr(UGraph, plotFN2, "Undirected graph - scc distribution for file " + file) print "\nComponent size distribution of %s is in: %s\n" % (file, plotFN2) # end of program
eff2 = snap.GetBfsEffDiam(Graph1, 100, False) eff3 = snap.GetBfsEffDiam(Graph1, 1000, False) print("Approximate effective diameter by sampling ", 10, " nodes: %0.4f" % eff1) print("Approximate effective diameter by sampling ", 100, " nodes: %0.4f" % eff2) print("Approximate effective diameter by sampling ", 1000, " nodes: %0.4f" % eff3) effmean = (eff1 + eff2 + eff3) / 3.0 effvar = (((eff1 * eff1) + (eff2 * eff2) + (eff3 * eff3)) / 3.0) - (effmean * effmean) print("Approximate effective diameter (mean and variance): %0.4f,%0.4f" % (effmean, effvar)) str1 = 'shortest_path_' + file_name snap.PlotShortPathDistr(Graph1, str1, "Distribution of shortest path lengths") #4.Components of the network fraction = snap.GetMxSccSz(Graph1) print("Fraction of nodes in largest connected component: %0.4f" % fraction) V_edges = snap.TIntPrV() snap.GetEdgeBridges(Graph1, V_edges) edge_bridges = V_edges.Len() print("Number of edge bridges: ", edge_bridges) Art_points = snap.TIntV() snap.GetArtPoints(Graph1, Art_points) art = Art_points.Len() print("Number of articulation points: ", art)
snap.GetArtPoints(G, ArtNIdV) print("Number of articulation points:", len(ArtNIdV)) print("Average clustering coefficient: %.4f" % snap.GetClustCf(G, -1)) print("Number of triads:", snap.GetTriads(G, -1)) Ran_n = G.GetRndNId(Rnd) print("Clustering coefficient of random node %d: %.4f" % (Ran_n, snap.GetNodeClustCf(G, Ran_n))) Ran_n = G.GetRndNId(Rnd) print("Number of triads random node %d participates: %d" % (Ran_n, snap.GetNodeTriads(G, Ran_n))) print("Number of edges that participate in at least one triad:", snap.GetTriadEdges(G)) snap.PlotInDegDistr(G, "D_" + sys.argv[1], "Degree Distribution") MoveFile(os.path.join(dirname, "inDeg.D_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "deg_dist_" + sys.argv[1] + ".png")) snap.PlotShortPathDistr(G, "S_" + sys.argv[1], "Shortest path Distribution") MoveFile( os.path.join(dirname, "diam.S_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "shortest_path_" + sys.argv[1] + ".png")) snap.PlotSccDistr(G, "C_" + sys.argv[1], "Component Size Distribution") MoveFile( os.path.join(dirname, "scc.C_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "connected_comp_" + sys.argv[1] + ".png")) snap.PlotClustCf(G, "C_" + sys.argv[1], "Clustering Coefficient Distribution") MoveFile( os.path.join(dirname, "ccf.C_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "clustering_coeff_" + sys.argv[1] + ".png"))
print("Approximate full diameter by sampling {} nodes: {}".format( numNodes[i], fullDia[i])) print( "Approximate full diameter (mean and variance): {:.4f} {:.4f}".format( np.mean(fullDia), np.var(fullDia))) ## Effective Diameter effDia = [sn.GetBfsEffDiam(graph, tNodes) for tNodes in numNodes] for i in range(3): print("Approximate effective diameter by sampling {} nodes: {:.4f}". format(numNodes[i], effDia[i])) print("Approximate effective diameter (mean and variance): {:.4f} {:.4f}". format(np.mean(effDia), np.var(effDia))) ## Plot Shortest Path Distr sn.PlotShortPathDistr(graph, name, "Shortest Path Distribution") plotRemove("diam", "shortest_path", name) #Question 4 ## Max Comp Fraction MxConCompSize = sn.GetMxScc(graph).GetNodes() print("Fraction of nodes in largest connected component: {:0.4f}".format( MxConCompSize / graph.GetNodes())) ## Edge Bridges edgeBridge = sn.TIntPrV() sn.GetEdgeBridges(graph, edgeBridge) print("Number of edge bridges: {}".format(len(edgeBridge))) ## Articulation Points
y.append(results[key]) inds = np.argsort(x) x2 = [] y2 = [] for ind in inds: x2.append(x[ind]) y2.append(y[ind]) plt.loglog(x2, y2, color=color, label=label) plt.show() G1, id2, synset2, _, _, _ = generate_word_graph(True, False, False) print(G1.GetNodes()) G2, id2, synset2, _, _, _ = generate_word_graph(False, True, False) G3, id2, synset2, _, _, _ = generate_word_graph(False, False, True) snap.PlotShortPathDistr(G1, "hyp", "graph - shortest path", 1000) snap.PlotShortPathDistr(G2, "poly", "graph - shortest path", 1000) snap.PlotShortPathDistr(G3, "mero", "graph - shortest path", 1000) make_log_degree_graph([(G1, "hypernym", "b"), (G2, "polysemy", "y"), (G3, "meronymy", "r")]) print(meme) G2, id2, synset2, _, _, _ = generate_meaning_graph(True, False, False) print(G2.GetNodes()) print(G2.GetEdges()) GW = snap.GetMxScc(G2) print(GW.GetNodes()) print(GW.GetNodes(), "lolhyp") G3, id2, synset2, _, _, _ = generate_meaning_graph(False, False, True)
def graphStructure(elistName, elistPath): """ Calculate properties of the graph as given in the assignment Args: elistName (str) -> Input elist name elistPath (pathlib.Path) -> Input elist using which graph needs to be built Return: RESULTS (dict) -> Dictionary containing results for different subparts of the assignment """ RESULTS = {} subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1) # Part 1 (Size of the network) RESULTS['nodeCount'] = subGraph.GetNodes() RESULTS['edgeCount'] = subGraph.GetEdges() # Part 2 (Degree of nodes in the network) maxDegree = 0 maxDegreeNodes = [] degree7Count = 0 for node in subGraph.Nodes(): if node.GetDeg() == 7: degree7Count += 1 maxDegree = max(maxDegree, node.GetDeg()) for node in subGraph.Nodes(): if node.GetDeg() == maxDegree: maxDegreeNodes.append(node.GetId()) plotFilename = f"deg_dist_{elistName}" # Since it is an undirected graph, in/out degree is unimportant snap.PlotOutDegDistr(subGraph, plotFilename) RESULTS['maxDegree'] = maxDegree RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes)) RESULTS['degree7Count'] = degree7Count # Part 3 (Paths in the network) # Full Diameter Calculation fullDiameters = { 10: snap.GetBfsFullDiam(subGraph, 10, False), 100: snap.GetBfsFullDiam(subGraph, 100, False), 1000: snap.GetBfsFullDiam(subGraph, 1000, False) } fullMean, fullVariance = meanVariance(fullDiameters.values()) fullDiameters['mean'] = fullMean fullDiameters['variance'] = fullVariance RESULTS['fullDiameters'] = fullDiameters # Effective Diameter Calculation effDiameters = { 10: snap.GetBfsEffDiam(subGraph, 10, False), 100: snap.GetBfsEffDiam(subGraph, 100, False), 1000: snap.GetBfsEffDiam(subGraph, 1000, False), } effMean, effVariance = meanVariance(effDiameters.values()) effDiameters['mean'] = effMean effDiameters['variance'] = effVariance RESULTS['effDiameters'] = effDiameters plotFilename = f"shortest_path_{elistName}" snap.PlotShortPathDistr(subGraph, plotFilename) # Part 4 (Components of the network) edgeBridges = snap.TIntPrV() articulationPoints = snap.TIntV() RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph) snap.GetEdgeBridges(subGraph, edgeBridges) snap.GetArtPoints(subGraph, articulationPoints) RESULTS['edgeBridges'] = len(edgeBridges) RESULTS['articulationPoints'] = len(articulationPoints) plotFilename = f"connected_comp_{elistName}" snap.PlotSccDistr(subGraph, plotFilename) # Part 5 (Connectivity and clustering in the network) RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1) RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0] nodeX = subGraph.GetRndNId(Rnd) nodeY = subGraph.GetRndNId(Rnd) RESULTS['randomClusterCoefficient'] = (nodeX, snap.GetNodeClustCf( subGraph, nodeX)) RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY)) RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph) plotFilename = f"clustering_coeff_{elistName}" snap.PlotClustCf(subGraph, plotFilename) return RESULTS
print("Approximate effective diameter by sampling 10 nodes:", round(effective_diameter[-1], 4)) effective_diameter.append(snap.GetBfsEffDiam(graph, 100)) print("Approximate effective diameter by sampling 100 nodes:", round(effective_diameter[-1], 4)) effective_diameter.append(snap.GetBfsEffDiam(graph, 1000)) print("Approximate effective diameter by sampling 1000 nodes:", round(effective_diameter[-1], 4)) print("Approximate effective diameter (mean and variance):", round(get_mean(effective_diameter), 4), ',', round(get_variance(effective_diameter), 4), sep="") snap.PlotShortPathDistr(graph, "temp", "Undirected graph - shortest path") os.system("mv diam.temp.png plots/shortest_path_" + subgraph_name + ".png") os.system("rm diam.*") print("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(graph), 4)) print("Number of edge bridges:", get_bridges(graph).Len()) print("Number of articulation points:", get_articulation_points(graph).Len()) snap.PlotSccDistr(graph, "temp", "Undirected graph - scc distribution") os.system("mv scc.temp.png plots/connected_comp_" + subgraph_name + ".png") os.system("rm scc.*") print("Average clustering coefficient:", round(snap.GetClustCf(graph), 4)) print("Number of triads:", snap.GetTriads(graph))
# # 3a full_diam_list = [] for i in range(1, 4): no_nodes = 10**i full_diam = snap.GetBfsFullDiam(Fb_graph, no_nodes, False) full_diam_list.append(full_diam) print("Approximate full diameter by sampling " + str(no_nodes) + " nodes: " + str(round(full_diam, 4))) mean = sum(full_diam_list) / len(full_diam_list) res = sum((i - mean)**2 for i in full_diam_list) / len(full_diam_list) print("Approximate full diameter (mean and variance): " + str(round(mean, 4)) + "," + str(round(res, 4))) eff_diam_list = [] for i in range(1, 4): no_nodes = 10**i eff_diam = snap.GetBfsEffDiam(Fb_graph, no_nodes, False) eff_diam_list.append(eff_diam) print("Approximate effective diameter by sampling " + str(no_nodes) + " nodes: " + str(round(eff_diam, 4))) mean = sum(eff_diam_list) / len(eff_diam_list) res = sum((i - mean)**2 for i in eff_diam_list) / len(eff_diam_list) print("Approximate effective diameter (mean and variance): " + str(round(mean, 4)) + "," + str(round(res, 4))) snap.PlotShortPathDistr(Fb_graph, "exa", "Directed graph - shortest path")
i = 10 average = 0.0 variance = 0.0 while (i <= 1000): diam = snap.GetBfsEffDiam(fbsgel, i, False) print("Approximate effective diameter by sampling", i, "nodes:", round(diam, 4)) i *= 10 average += diam variance += (diam * diam) average /= 3 variance = (variance / 3) - average * average print("Approximate effective diameter(mean and variance): %0.4f,%0.4f" % (average, variance)) #c Plot snap.PlotShortPathDistr(fbsgel, "shortest_path_" + str(subgraph_name), "shortest_path_" + str(subgraph_name)) #Q4 #a print("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(fbsgel), 4)) #b EdgeBridgeV = snap.TIntPrV() snap.GetEdgeBridges(fbsgel, EdgeBridgeV) print("Number of edge bridges:", len(EdgeBridgeV)) #c ArtNIdV = snap.TIntV() snap.GetArtPoints(fbsgel, ArtNIdV) print("Number of articulation points:", len(ArtNIdV)) #d Plot snap.PlotSccDistr(fbsgel, "connected_comp_" + str(subgraph_name),
def main(argv): if len(argv) != 1: print "usage: python gen-structure.py <path/to/edgelist>" sys.exit(0) # Q0. Uncomment to generate random5000by6.txt edge list. # generate_graph_nx(5000) graph_file_path = argv[0] graph_file_name = graph_file_path.split('/')[-1] print "Current file: {}".format(graph_file_name) # g_nx = nx.read_edgelist(graph_file_path) g_nx = nx.read_weighted_edgelist(graph_file_path) # Q1.a. print the number of nodes in the graph print "Number of nodes in {}: {}".format(graph_file_name, g_nx.number_of_nodes()) # Q1.b print the number of edges in the graph print "Number of edges in {}: {}".format(graph_file_name, g_nx.number_of_edges()) # Q2.a. nx.degree returns a number or a dictionary with nodes as keys and degree as value. degree_dict = nx.degree(g_nx) nodes_with_degree_1 = filter(lambda k: degree_dict[k] == 1, degree_dict.keys()) print "Number of nodes with degree = 1 in {}: {}".format(graph_file_name, len(nodes_with_degree_1)) # Q2.b. find max degree. max_degree = 0 nodes_with_max_degree = [] for k, v in degree_dict.items(): if v > max_degree: max_degree = v nodes_with_max_degree = [k] elif v == max_degree: nodes_with_max_degree.append(k) print "Max Degree is {}".format(max_degree) # print "Check: Max Degree is {}".format(sorted(degree_dict.values())[-1]) # sanity check print "Node id(s) with highest degree in {}: {}".format(graph_file_name, ", ".join(str(i) for i in nodes_with_max_degree)) # Q2.c. 2-hop Neighbors for node in nodes_with_degree_1: neighbors = g_nx.neighbors(node) # nodes in 1 hop. Should just be 1. if len(neighbors) > 1: # Sanity check print "Not a node with degree 1!!!" continue n1 = neighbors[0] n2s = g_nx.neighbors(n1) sum_degrees_n2s = reduce(lambda acc, d: acc + g_nx.degree(d), n2s, 0) avg_degree_n2 = float(sum_degrees_n2s)/len(n2s) print "The average degree of {}'s 2-hop neighborhood is: {}".format(node, avg_degree_n2) # Using snap for plots. g_snap = snap.LoadEdgeList(snap.PUNGraph, graph_file_path) # Q2.d Plot the degree distribution # snap.PlotOutDegDistr(g_snap, graph_file_name+"-degree_distribution", "Plot of the degree distribution") plot_degree_distribution(g_nx, graph_file_name) print "Degree distribution of {} is in: {}".format(graph_file_name, graph_file_name+"-degree_distribution.png") # added for assignment 2 print "Approx. diameter in {} ".format(nx.diameter(g_nx)) # Q3.a. Approximate full diameter (maximum shortest path length) full_diameters = [] for max_size in RANDOM_SIZE_LIST: full_diam = snap.GetBfsFullDiam(g_snap, max_size, False) full_diameters.append(full_diam) print "Approx. diameter in {} with sampling {} nodes: {}".format(graph_file_name, max_size, full_diam) print "Approx. diameter in {} (mean and variance): {}, {}.".format(graph_file_name, numpy.mean(full_diameters), numpy.var(full_diameters)) # Q3.b. Effective Diameter effective_diameters = [] for max_size in RANDOM_SIZE_LIST: effective_diam = snap.GetBfsEffDiam(g_snap, max_size, False) effective_diameters.append(effective_diam) print "Approx. effective diameter in {} with sampling {} nodes: {}".format(graph_file_name, max_size, effective_diam) print "Approx. effective diameter in {} (mean and variance): {}, {}.".format(graph_file_name, numpy.mean(effective_diameters), numpy.var(effective_diameters)) # Q3.c. Plot distribution of shortest path lengths snap.PlotShortPathDistr(g_snap, graph_file_name+"-shortest_path_distribution", "Plot of the distribution of shortest path lengths") print "Shortest path distribution of {} is in: {}".format(graph_file_name, "diam."+graph_file_name+"-shortest_path_distribution.png") # Q4.a. Fraction of nodes in the largest connected component. num_nodes_largest_comp_gnx = 0 connected_components_gnx = sorted(nx.connected_components(g_nx), key=len, reverse=True) if len(connected_components_gnx) > 0: num_nodes_largest_comp_gnx = len(connected_components_gnx[0]) frac_largest_comp_gnx = float(num_nodes_largest_comp_gnx)/g_nx.number_of_nodes() print "Fraction of nodes in largest connected component in {}: {}".format(graph_file_name, frac_largest_comp_gnx) # Q4.b. Fraction of nodes in the largest connected component of the complement of the real graph num_nodes_largest_comp_gnxc = 0 g_nx_c = nx.complement(g_nx, "g_nx_c") # Sort the connected components based on size connected_components_gnxc = sorted(nx.connected_components(g_nx_c), key=len, reverse=True) if len(connected_components_gnxc) > 0: num_nodes_largest_comp_gnxc = len(connected_components_gnxc[0]) frac_largest_comp_gnxc = float(num_nodes_largest_comp_gnxc) / g_nx_c.number_of_nodes() print "Fraction of nodes in largest connected component in {}'s complement: {}".format(graph_file_name, frac_largest_comp_gnxc) # Q4.c. Plot of the distribution of sizes of connected components. plot_distribution_of_connected_components(connected_components_gnx, graph_file_name) print "Component size distribution of {} is in: {}".format(graph_file_name, graph_file_name + "-scc_distribution.png") plot_distribution_of_connected_components(connected_components_gnxc, graph_file_name + "_complement") print "Component size distribution of the complement of {} is in: {}".format( graph_file_name, graph_file_name + "_complement-scc_distribution.png")