def analyzeMisc(FNGraph): # LCC, average distances, clustering t1 = time.time() print "Started calculating miscellaneous network statistics:" print '\tPercentage of nodes in LCC in Football network: %.3f' % (snap.GetMxWccSz(FNGraph) * 100.0) GraphClustCoeff = snap.GetClustCf (FNGraph, -1) print "\tClustering coefficient: %.3f" % GraphClustCoeff diam = snap.GetBfsFullDiam(FNGraph, 1432, False) print "\tNetwork diameter: %.3f\n" % diam print "\tCalculating average distance..." avgDist = 0 iter1 = 0 allNodes1 = FNGraph.GetNodes() for NI in FNGraph.Nodes(): if(iter1 % 100 == 0): print "\t\tCalculated for %d nodes" % iter1 NIdToDistH = snap.TIntH() snap.GetShortPath(FNGraph, NI.GetId(), NIdToDistH) singleDistSum = 0 for item in NIdToDistH: singleDistSum += NIdToDistH[item] avgDist += (1.0/allNodes1) * float(singleDistSum)/(allNodes1-1) iter1 += 1 print "\tNetwork average distance: %.3f" % avgDist print "\nFinished calculating in %f seconds\n" % (time.time() - t1)
def get_diameter(ei_graph): """Returns the graph diameter. https://snap.stanford.edu/snappy/doc/reference/GetBfsFullDiam.html """ num_sample = min(ei_graph.base().GetNodes(), 100) is_directed = False return snap.GetBfsFullDiam(ei_graph.base(), num_sample, is_directed)
def print_full_diameter(G): """ Prints full diameter by sampling 10, 100, 1000 nodes in subgraph G Also prints mean and variance of the full diameters obtained """ d10 = snap.GetBfsFullDiam(G, 10) d100 = snap.GetBfsFullDiam(G, 100) d1000 = snap.GetBfsFullDiam(G, 1000) array = np.array([d10, d100, d1000]) mean = round(np.mean(array), 4) variance = round(np.var(array), 4) print("Approximate full diameter by sampling 10 nodes:", d10) print("Approximate full diameter by sampling 100 nodes:", d100) print("Approximate full diameter by sampling 1000 nodes:", d1000) print(f"Approximate full diameter (mean and variance): {mean},{variance}")
def diameter_of_the_network(): sample = int(len(df) * 0.75) #use 75% of the data D = snap.GetBfsFullDiam(G, sample) print("Diameter", D) ED = snap.GetBfsEffDiam(G, sample) print("Effective Diameter", ED) All_dis = snap.GetBfsEffDiamAll(G, sample, False) print("Average Diameter:", All_dis[3])
def print_info(graph): for NI in graph.Nodes(): print("node: %d, out-degree %d, in-degree %d" % (NI.GetId(), NI.GetOutDeg(), NI.GetInDeg())) print("Number of nodes: ", graph.GetNodes()) print("Number of edges: ", graph.GetEdges()) print("Maximum degree: ", graph.GetNI(snap.GetMxDegNId(graph)).GetDeg()) print("Diameter (approximate): ", snap.GetBfsFullDiam(graph, 10)) print("Triangles: ", snap.GetTriads(graph)) print("Clustering coefficient: ", snap.GetClustCf(graph))
def printGenericInformation(graph, name): print("Generic informations of %s" % name) print('Nodes', graph.GetNodes()) print('Edges', graph.GetEdges()) print('Average degree (In+Out)', float(graph.GetEdges()) / float(graph.GetNodes())) print('Diameter', snap.GetBfsFullDiam(graph, 10)) print('Clustering coefficient', snap.GetClustCf(graph)) print('Triangles', snap.GetTriangleCnt(graph)) print('---------------------------------------')
def diam_by_time(full_graph, min_year=2005, max_year=2014): diamsz = [] for users, busin, graph in generate_all_graphs(full_graph, min_year=min_year, max_year=max_year): diamsz.append(snap.GetBfsFullDiam(graph, 40)) plt.plot(diamsz) plt.xlabel('Year') plt.ylabel('Diameter') plt.show()
def solve_shortest_path_based_questions(G, GName): Fulldiam1 = snap.GetBfsFullDiam(G, 10, False) print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 10, Fulldiam1) Fulldiam2 = snap.GetBfsFullDiam(G, 100, False) print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 100, Fulldiam2) Fulldiam3 = snap.GetBfsFullDiam(G, 1000, False) print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 1000, Fulldiam3) temp = np.array([Fulldiam1, Fulldiam2, Fulldiam3]) print "Approximate full diameter in {0} with sampling nodes (mean and variance): {1}, {2}".format( GName[:10], np.mean(temp), np.var(temp)) effdiam1 = snap.GetBfsEffDiam(G, 10, False) print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 10, effdiam1) effdiam2 = snap.GetBfsEffDiam(G, 100, False) print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 100, effdiam2) effdiam3 = snap.GetBfsEffDiam(G, 1000, False) print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format( GName[:-10], 1000, effdiam3) temp = np.array([effdiam1, effdiam2, effdiam3]) print "Approximate full diameter in {0} with sampling nodes (mean and variance): {1}, {2}".format( GName[:10], np.mean(temp), np.var(temp)) snap.PlotShortPathDistr(G, GName[:-10], GName[:-10] + " - shortest path") filename = "diam." + GName[:-10] + ".png" print "Shortest path distribution of {0} is in: {1}".format( GName[:-10], filename)
def diameter(self, n_node=100, isDir=False): ''' Computes the diameter, or ‘longest shortest path’, of a Graph This diameter is approximate, as it is calculated with an n_node number of random starting nodes. :param n_node: number of nodes to sample :param isDir: consider direct or not ''' snap = self.snap n_node = min(self.num_nodes, n_node) diam = snap.GetBfsFullDiam(self.graph, n_node, isDir) return diam
def analyzeMisc(FNGraph): # LCC, average distances, clustering tStart = time.time() print "[Network Analyzr] Started calculating miscellaneous network statistics..." LCCPercentage = snap.GetMxWccSz(FNGraph) * 100.0 print '\t[Network Analyzr] Percentage of nodes in LCC: %.3f' % LCCPercentage clusteringCoefficient = snap.GetClustCf (FNGraph, -1) print "\t[Network Analyzr] Clustering coefficient: %.3f" % clusteringCoefficient diameter= snap.GetBfsFullDiam(FNGraph, 1432, False) print "\t[Network Analyzr] Network diameter: %.3f\n" % diameter # Average distance print "\t[Network Analyzr] Calculating average distance..." i = 0 avgDist = 0 nodes = FNGraph.GetNodes() for sourceNode in FNGraph.Nodes(): if i % 100 == 0 and utils.mode == 'debug': print "\t\tCalculated for %d nodes" % i NIdToDistH = snap.TIntH() snap.GetShortPath(FNGraph, sourceNode.GetId(), NIdToDistH) distanceSum = 0 for destinationNode in NIdToDistH: distanceSum += NIdToDistH[destinationNode] avgDist += (1.0 / nodes) * float(distanceSum) / (nodes - 1) i += 1 print "\t[Network Analyzr] Network average distance: %.3f" % avgDist timeSpent = time.time() - tStart print "\n[Network Analyzr] Finished calculating in %f seconds\n" % timeSpent
def calculate(self): """ Calculates the metrics on the network using both networkx and snap :return: self """ self.calculate_basic() print "calculating clustering coefficient." self.clustering_coeff = snap.GetClustCf(self.g_snap, -1) print "calculating transitivity" self.transitivity = nx.transitivity(self.g_nx) print "calculating triads" self.num_triads = snap.GetTriads(self.g_snap, -1) print "calculating diameter" self.diameter = snap.GetBfsFullDiam(self.g_snap, 150, False) print "calculating spl" self.avg_spl = self.calculate_spl() return self
def test_snap(file_name): start = time.clock() g = sn.LoadEdgeList_PUNGraph("../data/" + file_name + ".gr", 0, 1) print "elapsed ", time.clock() - start print "#nodes ", g.GetNodes() print "#edges ", g.GetEdges() result_degree = sn.TIntV() sn.GetDegSeqV(g, result_degree) max_deg = max(result_degree) print "max_deg =", max_deg # transitivity (global clustering coefficient # start = time.clock() # clustering_coeff = sn.GetClustCf(g) # print "clustering_coeff =", clustering_coeff # print "elapsed ", time.clock() - start # BFS start = time.clock() s_Diam = sn.GetBfsFullDiam(g, N_BFS, False) print "s_Diam =", s_Diam print "elapsed ", time.clock() - start
def diameter(g): """get diameter""" dia = snap.GetBfsFullDiam(g, 100) print('diameter, ', dia) return dia
import snap #Loading dataset in edgelist form N = snap.GenRndGnm(snap.PUNGraph, 1134890, 2987624) #no of nodes and edges Nodes = N.GetNodes() print("Nodes: %d" % Nodes) Edges = N.GetEdges() print("Edges: %d" % Edges) #diameter LongShortPathDiam = snap.GetBfsFullDiam(N, Nodes, False) print("The Longest Shortest path (Diameter) of this youtube Network is %d" % LongShortPathDiam)
def approx_neighborhood_function_statistics(G, n_nodes, n_approx=64, approx_type=APPROX_BFS_IGRAPH): aG = convert_networkx_to_SNAP(G) print "convert to SNAP graph: DONE" # TEST (fixed) # s_Diam = 20 # youtube # diameter s_Diam (lowerbound) start = time.clock() if approx_type == APPROX_ANF: s_Diam = sn.GetAnfEffDiam(aG, False, 0.99, n_approx) elif approx_type == APPROX_BFS: s_Diam = sn.GetBfsFullDiam(aG, N_BFS, False) # elif approx_type == APPROX_BFS_IGRAPH: s_APD_i, s_EDiam_i, s_CL_i, s_Diam = bfs_samples(G) # _i: igraph else: print "Wrong <approx_type> !" print "compute s_Diam, elapsed :", time.clock() - start # average distance s_APD DistNbrsV = sn.TIntFltKdV() MxDist = int(math.ceil(s_Diam)) print "MxDist =", MxDist start = time.clock() sn.GetAnf(aG, DistNbrsV, MxDist, False, n_approx) # n_approx=32, 64... print "GetAnf, elapsed :", time.clock() - start # for item in DistNbrsV: # print item.Key(), "-", item.Dat() sum_APD = 0.0 dist_list = [] # list of pairs for item in DistNbrsV: dist_list.append([item.Key(), item.Dat()]) num_APD = dist_list[-1][1] # WAY 2 - compute s_EDiam from dist_list s_EDiam = 0 for i in range(len(dist_list)): if dist_list[i][1] >= 0.9 * num_APD: s_EDiam = dist_list[i][0] break for i in range(len(dist_list) - 1, 1, -1): # do not subtract [0] from [1] ! dist_list[i][1] = dist_list[i][1] - dist_list[i - 1][ 1] # compute differences sum_APD += dist_list[i][0] * dist_list[i][1] s_APD = sum_APD / num_APD print "num_APD =", num_APD # for s_PDD print "s_PDD :" d_list = [] for item in dist_list: # print item[0], "-", item[1] d_list.append(item[1]) print d_list # WAY 1 - effective diameter s_EDiam ( SNAP) # start = time.clock() # if approx_type == APPROX_ANF: # s_EDiam = sn.GetAnfEffDiam(aG, False, 0.9, n_approx) # 90% # elif approx_type == APPROX_BFS: # s_EDiam = sn.GetBfsEffDiam(aG, 1000, False) # else: # print "Wrong <approx_type> !" # print "compute s_EDiam, elapsed :", time.clock() - start # connectivity length s_CL sum_CL = 0.0 for item in dist_list: if item[0] > 0: sum_CL += item[1] / item[0] # s_CL = n_nodes*(n_nodes-1)/sum_CL s_CL = num_APD / sum_CL # return s_APD, float(s_EDiam), s_CL, float(s_Diam), s_APD_i, float( s_EDiam_i), s_CL_i, dist_list
subgraph_name = file_name.split('.')[0] graph = make_snap_graph(list_of_edge_from_file(file_name)) print("Number of nodes:", graph.GetNodes()) print("Number of edges:", graph.GetEdges()) print("Number of nodes with degree=7:", snap.CntDegNodes(graph, 7)) print("Node id(s) with highest degree:", end=" ") print(*nodes_with_highest_degree(graph), sep=",") snap.PlotInDegDistr(graph, "temp", "Undirected graph - in-degree Distribution") os.system("mv inDeg.temp.png plots/deg_dist_" + subgraph_name + ".png") os.system("rm inDeg.*") full_diameter = [] full_diameter.append(snap.GetBfsFullDiam(graph, 10)) print("Approximate full diameter by sampling 10 nodes:", full_diameter[-1]) full_diameter.append(snap.GetBfsFullDiam(graph, 100)) print("Approximate full diameter by sampling 100 nodes:", full_diameter[-1]) full_diameter.append(snap.GetBfsFullDiam(graph, 1000)) print("Approximate full diameter by sampling 1000 nodes:", full_diameter[-1]) print("Approximate full diameter (mean and variance): ", round(get_mean(full_diameter), 4), ',', round(get_variance(full_diameter), 4), sep="") effective_diameter = []
if node.GetDeg() > max_deg: max_deg = node.GetDeg() #print("Max degree:",max_deg) for node in Graph1.Nodes(): if node.GetDeg() == max_deg: nodes_max.append(node.GetId()) nodesmaxstring = ','.join(map( str, nodes_max)) #converting list to comma separated string print("Node id(s) with highest degree: %s" % nodesmaxstring) str = "deg_dist_" + file_name snap.PlotInDegDistr(Graph1, str, "Degree Distribution") #3.Paths in the Network full1 = snap.GetBfsFullDiam(Graph1, 10, False) full2 = snap.GetBfsFullDiam(Graph1, 100, False) full3 = snap.GetBfsFullDiam(Graph1, 1000, False) print("Approximate full diameter by sampling ", 10, " nodes: ", full1) print("Approximate full diameter by sampling ", 100, " nodes: ", full2) print("Approximate full diameter by sampling ", 1000, " nodes: ", full3) fmean = (full1 + full2 + full3) / 3.0 fvar = (((full1 * full1) + (full2 * full2) + (full3 * full3)) / 3.0) - (fmean * fmean) print("Approximate full diameter (mean and variance): %0.4f,%0.4f" % (fmean, fvar)) eff1 = snap.GetBfsEffDiam(Graph1, 10, False) eff2 = snap.GetBfsEffDiam(Graph1, 100, False) eff3 = snap.GetBfsEffDiam(Graph1, 1000, False) print("Approximate effective diameter by sampling ", 10,
def get_diameter(Graph): NTestNodes = 302 return snap.GetBfsFullDiam(Graph, NTestNodes, True)
print "size %d: count %d" % (p.GetVal1(), p.GetVal2()) # get degree distribution pairs (out-degree, count): snap.GetOutDegCnt(G9, CntV) for p in CntV: print "degree %d: count %d" % (p.GetVal1(), p.GetVal2()) # generate a Preferential Attachment graph on 100 nodes and out-degree of 3 G10 = snap.GenPrefAttach(100, 3) print "G10: Nodes %d, Edges %d" % (G10.GetNodes(), G10.GetEdges()) # define a vector of floats and get first eigenvector of graph adjacency matrix EigV = snap.TFltV() snap.GetEigVec(G10, EigV) nr = 0 for f in EigV: nr += 1 print "%d: %.6f" % (nr, f) # get an approximation of graph diameter diam = snap.GetBfsFullDiam(G10, 10) print "diam", diam # count the number of triads: triads = snap.GetTriads(G10) print "triads", triads # get the clustering coefficient cf = snap.GetClustCf(G10) print "cf", cf
def intro(): # create a graph PNGraph G1 = snap.TNGraph.New() G1.AddNode(1) G1.AddNode(5) G1.AddNode(32) G1.AddEdge(1, 5) G1.AddEdge(5, 1) G1.AddEdge(5, 32) print("G1: Nodes %d, Edges %d" % (G1.GetNodes(), G1.GetEdges())) # create a directed random graph on 100 nodes and 1k edges G2 = snap.GenRndGnm(snap.PNGraph, 100, 1000) print("G2: Nodes %d, Edges %d" % (G2.GetNodes(), G2.GetEdges())) # traverse the nodes for NI in G2.Nodes(): print("node id %d with out-degree %d and in-degree %d" % (NI.GetId(), NI.GetOutDeg(), NI.GetInDeg())) # traverse the edges for EI in G2.Edges(): print("edge (%d, %d)" % (EI.GetSrcNId(), EI.GetDstNId())) # traverse the edges by nodes for NI in G2.Nodes(): for Id in NI.GetOutEdges(): print("edge (%d %d)" % (NI.GetId(), Id)) # generate a network using Forest Fire model G3 = snap.GenForestFire(1000, 0.35, 0.35) print("G3: Nodes %d, Edges %d" % (G3.GetNodes(), G3.GetEdges())) # save and load binary FOut = snap.TFOut("test.graph") G3.Save(FOut) FOut.Flush() FIn = snap.TFIn("test.graph") G4 = snap.TNGraph.Load(FIn) print("G4: Nodes %d, Edges %d" % (G4.GetNodes(), G4.GetEdges())) # save and load from a text file snap.SaveEdgeList(G4, "test.txt", "Save as tab-separated list of edges") G5 = snap.LoadEdgeList(snap.PNGraph, "test.txt", 0, 1) print("G5: Nodes %d, Edges %d" % (G5.GetNodes(), G5.GetEdges())) # generate a network using Forest Fire model G6 = snap.GenForestFire(1000, 0.35, 0.35) print("G6: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges())) # convert to undirected graph G7 = snap.ConvertGraph(snap.PUNGraph, G6) print("G7: Nodes %d, Edges %d" % (G7.GetNodes(), G7.GetEdges())) # get largest weakly connected component of G WccG = snap.GetMxWcc(G6) # get a subgraph induced on nodes {0,1,2,3,4,5} SubG = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4)) # get 3-core of G Core3 = snap.GetKCore(G6, 3) # delete nodes of out degree 10 and in degree 5 snap.DelDegKNodes(G6, 10, 5) print("G6a: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges())) # generate a Preferential Attachment graph on 1000 nodes and node out degree of 3 G8 = snap.GenPrefAttach(1000, 3) print("G8: Nodes %d, Edges %d" % (G8.GetNodes(), G8.GetEdges())) # vector of pairs of integers (size, count) CntV = snap.TIntPrV() # get distribution of connected components (component size, count) snap.GetWccSzCnt(G8, CntV) # get degree distribution pairs (degree, count) snap.GetOutDegCnt(G8, CntV) # vector of floats EigV = snap.TFltV() # get first eigenvector of graph adjacency matrix snap.GetEigVec(G8, EigV) # get diameter of G8 snap.GetBfsFullDiam(G8, 100) # count the number of triads in G8, get the clustering coefficient of G8 snap.GetTriads(G8) snap.GetClustCf(G8)
def graphStructure(elistName, elistPath): """ Calculate properties of the graph as given in the assignment Args: elistName (str) -> Input elist name elistPath (pathlib.Path) -> Input elist using which graph needs to be built Return: RESULTS (dict) -> Dictionary containing results for different subparts of the assignment """ RESULTS = {} subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1) # Part 1 (Size of the network) RESULTS['nodeCount'] = subGraph.GetNodes() RESULTS['edgeCount'] = subGraph.GetEdges() # Part 2 (Degree of nodes in the network) maxDegree = 0 maxDegreeNodes = [] degree7Count = 0 for node in subGraph.Nodes(): if node.GetDeg() == 7: degree7Count += 1 maxDegree = max(maxDegree, node.GetDeg()) for node in subGraph.Nodes(): if node.GetDeg() == maxDegree: maxDegreeNodes.append(node.GetId()) plotFilename = f"deg_dist_{elistName}" # Since it is an undirected graph, in/out degree is unimportant snap.PlotOutDegDistr(subGraph, plotFilename) RESULTS['maxDegree'] = maxDegree RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes)) RESULTS['degree7Count'] = degree7Count # Part 3 (Paths in the network) # Full Diameter Calculation fullDiameters = { 10: snap.GetBfsFullDiam(subGraph, 10, False), 100: snap.GetBfsFullDiam(subGraph, 100, False), 1000: snap.GetBfsFullDiam(subGraph, 1000, False) } fullMean, fullVariance = meanVariance(fullDiameters.values()) fullDiameters['mean'] = fullMean fullDiameters['variance'] = fullVariance RESULTS['fullDiameters'] = fullDiameters # Effective Diameter Calculation effDiameters = { 10: snap.GetBfsEffDiam(subGraph, 10, False), 100: snap.GetBfsEffDiam(subGraph, 100, False), 1000: snap.GetBfsEffDiam(subGraph, 1000, False), } effMean, effVariance = meanVariance(effDiameters.values()) effDiameters['mean'] = effMean effDiameters['variance'] = effVariance RESULTS['effDiameters'] = effDiameters plotFilename = f"shortest_path_{elistName}" snap.PlotShortPathDistr(subGraph, plotFilename) # Part 4 (Components of the network) edgeBridges = snap.TIntPrV() articulationPoints = snap.TIntV() RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph) snap.GetEdgeBridges(subGraph, edgeBridges) snap.GetArtPoints(subGraph, articulationPoints) RESULTS['edgeBridges'] = len(edgeBridges) RESULTS['articulationPoints'] = len(articulationPoints) plotFilename = f"connected_comp_{elistName}" snap.PlotSccDistr(subGraph, plotFilename) # Part 5 (Connectivity and clustering in the network) RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1) RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0] nodeX = subGraph.GetRndNId(Rnd) nodeY = subGraph.GetRndNId(Rnd) RESULTS['randomClusterCoefficient'] = (nodeX, snap.GetNodeClustCf( subGraph, nodeX)) RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY)) RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph) plotFilename = f"clustering_coeff_{elistName}" snap.PlotClustCf(subGraph, plotFilename) return RESULTS
def get_longest_shortest_path(G, num_start_nodes, directed=False): return snap.GetBfsFullDiam(G, num_start_nodes, directed)
print "Number of triads: " + str(NumTriads) #params to generate graph print "Params to graph: Original Graph" #avg path length alist = list() i = 0 for node in Graph.Nodes(): avg = 0.0 ndh = snap.TIntH() snap.GetShortPath(Graph, node.GetId(), ndh) for item in ndh: avg = avg + ndh[item] alist.append(avg / len(ndh)) print "Avg path length", float(sum(alist)) / len(alist) #diameter diam = snap.GetBfsFullDiam(Graph, n, False) print "diameter", diam #avg clus cf GraphClustCoeff = snap.GetClustCf(Graph, -1) print "Avg Clustering Coefficient", GraphClustCoeff #model specific metrics #ER Graph print "" print "------ER Graph-------" #nodes n = ERGraph.GetNodes() print "Nodes:", n #edges print "Edges:", ERGraph.GetEdges() #triads
from IPython.display import Image ## Data Types G = snap.LoadEdgeList(snap.PNGraph, "../../RepositoryData/data/cit-HepTh.txt") ## Get node degrees CntV = snap.TIntPrV() snap.GetOutDegCnt(G, CntV) for p in CntV: print("degree %d: count %d" % (p.GetVal1(), p.GetVal2())) print(snap.GetClustCf(G)) # clustering coefficient print(snap.GetTriads(G))# diameter print(snap.GetBfsFullDiam(G, 10)) ## Betweenness centrality Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(G, Nodes, Edges, 1.0) # for node in Nodes: # print("node: %d centrality: %f" % (node, Nodes[node])) # for edge in Edges: # print("edge: (%d, %d) centrality: %f" % (edge.GetVal1(), edge.GetVal2(), Edges[edge])) Graph = snap.GenRndGnm(snap.PNGraph, 10, 20) Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(Graph, Nodes, Edges, 1.0)
#WS Graph print "------WS Graph-------" #nodes n = WSGraph.GetNodes() print "Nodes:", n #edges print "Edges:", WSGraph.GetEdges() #triads NumTriads = snap.GetTriads(WSGraph, -1) print "Number of triads: " + str(NumTriads) #params to generate graph print "Params to graph: Mean node degree k", MeanDeg #avg path length alist = list() i = 0 for node in WSGraph.Nodes(): avg = 0.0 ndh = snap.TIntH() snap.GetShortPath(WSGraph, node.GetId(), ndh) for item in ndh: avg = avg + ndh[item] alist.append(avg / len(ndh)) print "Avg path length", float(sum(alist)) / len(alist) #diameter diam = snap.GetBfsFullDiam(WSGraph, n, False) print "diameter", diam #avg clus cf GraphClustCoeff = snap.GetClustCf(WSGraph, -1) print "Avg Clustering Coefficient", GraphClustCoeff
def net_structure(dataset_dir, output_dir, net, IsDir, weight): print( "\n######################################################################\n" ) if os.path.isfile(str(output_dir) + str(net) + "_net_struct.json"): print("Arquivo já existe: " + str(output_dir) + str(net) + "_net_struct.json") else: print("Dataset network structure - " + str(dataset_dir)) n = [] # Média dos nós por rede-ego e = [] # Média das arestas por rede-ego nodes = {} # chave_valor para ego_id e numero de vertices edges = {} # chave_valor para ego_id e numero de arestas d = [] # Média dos diametros por rede-ego diameter = {} # chave_valor para ego_id e diametro dens = [] density = {} cc = [] # Média dos Close Centrality bc_n = [] # média de betweenness centrality dos nós bc_e = [] # média de betweenness centrality das arestas degree = { } # chave-valor para armazenar "grau dos nós - numero de nós com este grau" i = 0 for file in os.listdir(dataset_dir): ego_id = file.split(".edge_list") ego_id = long(ego_id[0]) i += 1 print( str(output_dir) + str(net) + " - Calculando propriedades para o ego " + str(i) + ": " + str(file)) if IsDir is True: G = snap.LoadEdgeList( snap.PNGraph, dataset_dir + file, 0, 1 ) # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t') else: G = snap.LoadEdgeList( snap.PUNGraph, dataset_dir + file, 0, 1 ) # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t') ##################################################################################### n_nodes = G.GetNodes() n_edges = G.GetEdges() nodes[ego_id] = n_nodes #Dicionário ego_id = vertices edges[ego_id] = n_edges n.append(n_nodes) # Numero de vértices e.append(n_edges) # Número de Arestas if n_edges == 0: a = 0 d.append(a) cc.append(a) bc_n.append(a) bc_e.append(a) dens.append(a) density[ego_id] = a diameter[ego_id] = a else: ##################################################################################### w = float(n_edges) / (float(n_nodes) * (float(n_nodes) - 1) ) # Calcular a densidade da rede dens.append(w) density[ego_id] = w ##################################################################################### z = snap.GetBfsFullDiam(G, 100, IsDir) d.append(z) # get diameter of G diameter[ego_id] = z ##################################################################################### Normalized = True for NI in G.Nodes(): cc.append( snap.GetClosenessCentr( G, NI.GetId(), Normalized, IsDir)) #get a closeness centrality ##################################################################################### #### Tem que corrigir... fazer uma versão 5 com correção: se n_nodes < 3 a bc fica = n_edges e deveria ser bc=0 if n_edges == 0 or n_nodes < 3: bc_n.append(n_edges) bc_e.append(n_edges) else: Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr( G, Nodes, Edges, 1.0, IsDir) #Betweenness centrality Nodes and Edges if IsDir is True: max_betweenneess = (n_nodes - 1) * (n_nodes - 2) else: max_betweenneess = ((n_nodes - 1) * (n_nodes - 2)) / 2 for node in Nodes: bc_n_normalized = float( Nodes[node]) / float(max_betweenneess) bc_n.append(bc_n_normalized) for edge in Edges: bc_e_normalized = float( Edges[edge]) / float(max_betweenneess) bc_e.append(bc_e_normalized) ##################################################################################### DegToCntV = snap.TIntPrV() snap.GetDegCnt( G, DegToCntV) #Grau de cada nó em cada rede-ego for item in DegToCntV: k = item.GetVal1() v = item.GetVal2() if degree.has_key(k): degree[k] = degree[k] + v else: degree[k] = v ##################################################################################### print n[i - 1], e[i - 1], dens[i - 1], d[i - 1], cc[i - 1], bc_n[i - 1], bc_e[i - 1] print ##################################################################################### N = calc.calcular_full(n) E = calc.calcular_full(e) histogram.histogram(degree, output_dir + "histogram" + "/", N['soma'], net) DENS = calc.calcular_full(dens) D = calc.calcular_full(d) CC = calc.calcular_full(cc) BC_N = calc.calcular_full(bc_n) BC_E = calc.calcular_full(bc_e) overview = {} overview['Nodes'] = N overview['Edges'] = E overview['Density'] = DENS overview['Diameter'] = D overview['CloseCentr'] = CC overview['BetweennessCentrNodes'] = BC_N overview['BetweennessCentrEdges'] = BC_E nodes_stats = calc.calcular_full(n) edges_stats = calc.calcular_full(e) overview_basics = { 'nodes': n, 'nodes_stats': nodes_stats, 'edges': e, 'edges_stats': edges_stats } output_basics = output_dir + "/" + str(net) + "/" if not os.path.exists(output_basics): os.makedirs(output_basics) with open(str(output_basics) + str(net) + "_nodes.json", 'w') as f: f.write(json.dumps(nodes)) with open(str(output_basics) + str(net) + "_edges.json", 'w') as f: f.write(json.dumps(edges)) with open(str(output_basics) + str(net) + "_density.json", 'w') as f: f.write(json.dumps(density)) with open(str(output_basics) + str(net) + "_diameter.json", 'w') as f: f.write(json.dumps(diameter)) with open(str(output_basics) + str(net) + "_overview.json", 'w') as f: f.write(json.dumps(overview_basics)) with open(str(output_dir) + str(net) + "_net_struct.json", 'w') as f: f.write(json.dumps(overview))
def calc_metric(G, metric): IsDir = True # Todas as redes são direcionadas n_nodes = G.GetNodes() n_edges = G.GetEdges() if metric == "nodes": result = n_nodes elif metric == "edges": result = n_edges elif metric == "size": result = n_nodes + n_edges elif metric == "avg_degree": result = float(2 * n_edges) / float(n_nodes) elif metric == "diameter": result = snap.GetBfsFullDiam(G, 100, IsDir) elif metric == "density": result = float(n_edges) / (float(n_nodes) * (float(n_nodes - 1))) elif metric == "closeness_centr": Normalized = True cc = [] for NI in G.Nodes(): cc.append(snap.GetClosenessCentr( G, NI.GetId(), Normalized, IsDir)) #get a closeness centrality _cc = calc.calcular(cc) result = _cc['media'] elif metric == "betweenness_centr_nodes": bc_n = [] if n_edges == 0 or n_nodes < 3: bc_n.append(int(0)) else: Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(G, Nodes, Edges, 1.0, IsDir) #Betweenness centrality Nodes if IsDir is True: max_betweenneess = (n_nodes - 1) * (n_nodes - 2) else: max_betweenneess = ((n_nodes - 1) * (n_nodes - 2)) / 2 for node in Nodes: bc_n_normalized = float(Nodes[node]) / float(max_betweenneess) bc_n.append(bc_n_normalized) _bc_n = calc.calcular(bc_n) result = _bc_n['media'] elif metric == "betweenness_centr_edges": bc_e = [] if n_edges == 0 or n_nodes < 3: bc_e.append(int(0)) else: Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(G, Nodes, Edges, 1.0, IsDir) #Betweenness centrality Edges if IsDir is True: max_betweenneess = (n_nodes - 1) * (n_nodes - 2) else: max_betweenneess = ((n_nodes - 1) * (n_nodes - 2)) / 2 for edge in Edges: bc_e_normalized = float(Edges[edge]) / float(max_betweenneess) bc_e.append(bc_e_normalized) _bc_e = calc.calcular(bc_e) result = _bc_e['media'] elif metric == "clust_coef": result = snap.GetClustCf(G, -1) else: result = None print("\nImpossível calcular " + str(metric)) print("\n") sys.exit() return result
import snap import sys G5 = snap.LoadEdgeList(snap.PNGraph, sys.argv[1], 0, 1, '\t') count = 0 for v in G5.Nodes(): count = count + 1 print count diam = snap.GetBfsFullDiam(G5, 100) WccG = snap.GetMxWcc(G5) print WccG print diam # d=GetBfsEffDiam(G5,10,1,)
max_deg_fb = NI.GetDeg() for NI in fbsgel.Nodes(): if (NI.GetDeg() == max_deg_fb): MaxDegVfb.append(NI.GetId()) MaxDegNodeString = ','.join(map(str, MaxDegVfb)) print("Node id(s) with highest degree:", MaxDegNodeString) #c snap.PlotOutDegDistr(fbsgel, "deg_dist_" + str(subgraph_name), "deg_dist_" + str(subgraph_name)) #Q3 #a i = 10 average = 0.0 variance = 0.0 while (i <= 1000): diam = snap.GetBfsFullDiam(fbsgel, i, False) print("Approximate full diameter by sampling", i, "nodes:", round(diam, 4)) i *= 10 average += diam variance += (diam * diam) average /= 3 variance = (variance / 3) - average * average print("Approximate full diameter(mean and variance): %0.4f,%0.4f" % (average, variance)) #b i = 10 average = 0.0 variance = 0.0 while (i <= 1000): diam = snap.GetBfsEffDiam(fbsgel, i, False) print("Approximate effective diameter by sampling", i, "nodes:",
def main(): parentDir = os.getcwd() os.chdir(parentDir + "/subgraphs") sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1) subGraphName = sys.argv[1].split(".")[0] os.chdir(parentDir) #### 1 ######## node_count = 0 for node in sub_graph.Nodes(): node_count = node_count + 1 printWithOutNewLine("Number of nodes:", node_count) printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph)) #### 2 ######## printWithOutNewLine("Number of nodes with degree=7:", snap.CntDegNodes(sub_graph, 7)) rndMaxDegNId = snap.GetMxDegNId(sub_graph) nodeDegPairs = snap.TIntPrV() snap.GetNodeInDegV(sub_graph, nodeDegPairs) maxDegVal = 0 for pair in nodeDegPairs: if (pair.GetVal1() == rndMaxDegNId): maxDegVal = pair.GetVal2() break maxDegNodes = [] for pair in nodeDegPairs: if (pair.GetVal2() == maxDegVal): maxDegNodes.append(pair.GetVal1()) print("Node id(s) with highest degree:", end=" ") print(*maxDegNodes, sep=',') #### 3 ######## sampledFullDiam = [] sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False)) sampledFullDiamStats = [] sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4)) sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4)) printWithOutNewLine("Approximate full diameter by sampling 10 nodes:", sampledFullDiam[0]) printWithOutNewLine("Approximate full diameter by sampling 100 nodes:", sampledFullDiam[1]) printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:", sampledFullDiam[2]) print("Approximate full diameter (mean and variance):", end=" ") print(*sampledFullDiamStats, sep=',') sampledEffDiam = [] sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4)) sampledEffDiamStats = [] sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4)) sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4)) printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:", sampledEffDiam[0]) printWithOutNewLine( "Approximate effective diameter by sampling 100 nodes:", sampledEffDiam[1]) printWithOutNewLine( "Approximate effective diameter by sampling 1000 nodes:", sampledEffDiam[2]) print("Approximate effective diameter (mean and variance):", end=" ") print(*sampledEffDiamStats, sep=',') #### 4 ######## printWithOutNewLine("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(sub_graph), 4)) bridgeEdges = snap.TIntPrV() snap.GetEdgeBridges(sub_graph, bridgeEdges) printWithOutNewLine("Number of edge bridges:", len(bridgeEdges)) articulationPoints = snap.TIntV() snap.GetArtPoints(sub_graph, articulationPoints) printWithOutNewLine("Number of articulation points:", len(articulationPoints)) #### 5 ######## printWithOutNewLine("Average clustering coefficient:", round(snap.GetClustCf(sub_graph, -1), 4)) printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1)) randomNodeId = sub_graph.GetRndNId() nodeIdCcfMap = snap.TIntFltH() snap.GetNodeClustCf(sub_graph, nodeIdCcfMap) print("Clustering coefficient of random node", end=" ") print(randomNodeId, end=": ") print(round(nodeIdCcfMap[randomNodeId], 4)) print("Number of triads random node", end=" ") print(randomNodeId, end=" participates: ") print(snap.GetNodeTriads(sub_graph, randomNodeId)) printWithOutNewLine( "Number of edges that participate in at least one triad:", snap.GetTriadEdges(sub_graph, -1)) #### plots ######## if not os.path.isdir('plots'): os.makedirs('plots') os.chdir(parentDir + "/plots") plotsDir = os.getcwd() snap.PlotOutDegDistr(sub_graph, subGraphName, subGraphName + " Subgraph Degree Distribution") snap.PlotShortPathDistr( sub_graph, subGraphName, subGraphName + " Subgraph Shortest Path Lengths Distribution") snap.PlotSccDistr( sub_graph, subGraphName, subGraphName + " Subgraph Connected Components Size Distribution") snap.PlotClustCf( sub_graph, subGraphName, subGraphName + " Subgraph Clustering Coefficient Distribution") files = os.listdir(plotsDir) for file in files: if not file.endswith(".png"): os.remove(os.path.join(plotsDir, file)) plots = os.listdir(plotsDir) filePrefix = "filename" for file in plots: nameSplit = file.split(".") if (len(nameSplit) == 2): continue if (nameSplit[0] == "ccf"): filePrefix = "clustering_coeff_" elif (nameSplit[0] == "outDeg"): filePrefix = "deg_dist_" elif (nameSplit[0] == "diam"): filePrefix = "shortest_path_" elif (nameSplit[0] == "scc"): filePrefix = "connected_comp_" os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2]) os.chdir(parentDir)