def test_snap(self): """Test that snap.py installed correctly. """ import snap num_nodes = 20 # Generate different undirected graphs full_graph = snap.GenFull(snap.PUNGraph, num_nodes) star_graph = snap.GenStar(snap.PUNGraph, num_nodes) random_graph = snap.GenRndGnm(snap.PUNGraph, num_nodes, num_nodes * 3) # Basic statistics on the graphs self.assertEqual(snap.CntInDegNodes(full_graph, num_nodes - 1), num_nodes) self.assertEqual(snap.CntOutDegNodes(full_graph, num_nodes - 1), num_nodes) self.assertEqual(snap.GetMxInDegNId(star_graph), 0) self.assertEqual(snap.GetMxOutDegNId(star_graph), 0) # Iterator degree_to_count = snap.TIntPrV() snap.GetInDegCnt(full_graph, degree_to_count) # There should be only one entry (num_nodes - 1, num_nodes) for item in degree_to_count: self.assertEqual(num_nodes - 1, item.GetVal1()) self.assertEqual(num_nodes, item.GetVal2()) # Rewiring rewired_graph = snap.GenRewire(random_graph) for n1 in random_graph.Nodes(): for n2 in rewired_graph.Nodes(): if n1.GetId() == n2.GetId(): self.assertEqual(n1.GetOutDeg() + n1.GetInDeg(), n2.GetOutDeg() + n2.GetInDeg())
def plotDegreeDist(Graph, title, c, x_u, y_d): distribution = snap.TIntPrV() snap.GetInDegCnt(Graph, distribution) nodes = Graph.GetNodes() X, Y = [], [] for d in distribution: X.append(d.GetVal1()) Y.append(float(d.GetVal2()) / nodes) ''' plt.loglog(X, Y, color = c) plt.title(title) plt.xlabel('Node Degree (log)') plt.ylabel('fraction of nodes(log)') plt.show() ''' g = plt.scatter(X, Y, marker='.', color=c) plt.xlabel("degree") plt.ylabel("fraction of nodes") plt.title(title) plt.xscale('log') plt.yscale('log') plt.xlim(1, x_u) plt.ylim(y_d, 0.1) plt.show()
def avgDegreeDist(family, direction, numSamples, apiGraph): path = 'data/graphs/' + family + '/' files = os.listdir(path) if apiGraph: graph_files = filter(lambda x: '.apigraph' in x, files) else: graph_files = filter(lambda x: '.edges' in x, files) random.shuffle(graph_files) maxdeg = 0 if apiGraph: Gs = [snap.TNEANet.Load(snap.TFIn(path + f)) for f in graph_files[:numSamples]] else: Gs = [snap.LoadEdgeList(snap.PNEANet, path + f, 0, 1) for f in graph_files[:numSamples]] if direction == 'in': maxdeg = max([G.GetNI((snap.GetMxInDegNId(G))).GetInDeg() for G in Gs]) else: maxdeg = max([G.GetNI((snap.GetMxOutDegNId(G))).GetOutDeg() for G in Gs]) avg_deg_dist = np.zeros(maxdeg + 1) for G in Gs: DegToCntV = snap.TIntPrV() if direction == 'in': snap.GetInDegCnt(G, DegToCntV) else: snap.GetOutDegCnt(G, DegToCntV) for item in DegToCntV: deg = item.GetVal1() avg_deg_dist[deg] += item.GetVal2() avg_deg_dist = avg_deg_dist / numSamples return avg_deg_dist
def get_in_dists(G): deg_counts = [] degs = [] deg_vect = snap.TIntPrV() snap.GetInDegCnt(G, deg_vect) for item in deg_vect: deg = item.GetVal1() cnt = item.GetVal2() deg_counts.append(cnt) degs.append(deg) out_deg = [] out_counts = [] cur_deg = min(degs) for deg, cnt in zip(degs, deg_counts): # while cur_deg < deg: # out_deg.append(cur_deg) # out_counts.append(0) # cur_deg += 1 out_deg.append(deg) out_counts.append(cnt) cur_deg += 1 deg_counts = np.asarray(out_counts) degs = np.asarray(out_deg) pdf = deg_counts.astype(float) / sum(deg_counts) cdf = np.cumsum(pdf) cdf = np.insert(cdf, 0, 0) ccdf = 1 - cdf return deg_counts, degs, cdf, ccdf, pdf
def f(): snap = self.snap DegToCntV = snap.TFltPr64V() snap.GetInDegCnt(self.graph, DegToCntV) ret = [] for item in DegToCntV: ret.append((item.GetVal1(), item.GetVal2())) return ret
def get_in_degree_distribution(Graph): snap.GetInDegCnt(Graph, DegToCntV) num_node = Graph.GetNodes() XI, YI = [], [] for item in DegToCntV: if item.GetVal1() == 0 or item.GetVal2() == 0: continue XI.append(item.GetVal1()) YI.append(item.GetVal2() * 1.0 / num_node) return XI, YI
def getMean(network): DegToCntV = snap.TIntPrV() snap.GetInDegCnt(network, DegToCntV) total_nodes = 0 total_degree = 0 for item in DegToCntV: total_nodes += item.GetVal2() total_degree += item.GetVal1() return total_degree/total_nodes
def plotDegreeDist(Graph): distribution = snap.TIntPrV() snap.GetInDegCnt(Graph, distribution) nodes = Graph.GetNodes() X, Y = [], [] for d in distribution: X.append(d.GetVal1()) Y.append(float(d.GetVal2()) / nodes) plt.loglog(X, Y, color = 'y', label = 'Customer - Product Graph')
def getStandartDeviation(network, mean): DegToCntV = snap.TIntPrV() snap.GetInDegCnt(network, DegToCntV) total_nodes = 0 total_deviation = 0 for item in DegToCntV: total_nodes += item.GetVal2() total_deviation += abs(item.GetVal1() - mean) return total_deviation/total_nodes
def getDataPointsToPlot(Graph, degType): """ return values: X: list of degrees Y: list of frequencies: Y[i] = fraction of nodes with degree X[i] """ ############################################################################ DegToCntV = snap.TIntPrV() if degType == "In": snap.GetInDegCnt(Graph, DegToCntV) elif degType == "Out": snap.GetOutDegCnt(Graph, DegToCntV) elif degType == "Total": snap.GetDegCnt(Graph, DegToCntV) else: raise ValueError("Invalid degree type: please use 'In', 'Out' or 'Total'.") NumNodes = Graph.GetNodes() DegToFrqV = { item.GetVal1() : float(item.GetVal2())/NumNodes for item in DegToCntV } DegToFrqV = sorted(DegToFrqV.items()) X, Y = zip(*DegToFrqV) ############################################################################ return X, Y def plot_graph(name): G = load_graph(name) print "{} graph nodes: {}".format(name, G.GetNodes()) print "{} graph edges: {}".format(name, G.GetEdges()) x_in, y_in = getDataPointsToPlot(G, 'In') plt.loglog(x_in, y_in, marker=',', color = 'y', label = 'In Degree') x_out, yout = getDataPointsToPlot(G, 'Out') plt.loglog(x_out, y_out, marker=',', color = 'r', label = 'Out Degree') x_total, y_total = getDataPointsToPlot(G, 'Total') plt.loglog(x_total, y_total, marker=',', color = 'b', label = 'Total Degree') #linestyle = 'dotted' plt.xlabel('Node Degree (log)') plt.ylabel('Proportion of Nodes with a Given Degree (log)') plt.title('Degree Distribution of In, Out, and Total degree for {} network'.format(name)) plt.legend() plt.show() if __name__ == "__main__": # Plot distribution graphs for RT, MT, RE, Social networks plot_graph("retweet") plot_graph("mention") plot_graph("reply") plot_graph("social")
def Get_In_Degree_Distribution(G): Deg_dist = snap.TIntPrV() snap.GetInDegCnt(G, Deg_dist) degree = np.empty((1, 0)) count = np.empty((1, 0)) for node_degree_pr in Deg_dist: if node_degree_pr.GetVal1() > 0: degree = np.append(degree, node_degree_pr.GetVal1()) count = np.append(count, node_degree_pr.GetVal2()) '''
def getInDegDistr(G, outdeg): degHistogram = snap.TIntPrV() if outdeg: snap.GetOutDegCnt(G, degHistogram) else: snap.GetInDegCnt(G, degHistogram) degDistr = [(pair.GetVal1(), pair.GetVal2()) for pair in degHistogram] degDistr = sorted(degDistr, key=lambda pair: pair[0], reverse=False) degrees = [] counts = [] for pair in degDistr: #first = degree degrees.append(pair[0]) #second = #nodes of degree - normalize by total nodes to get proportion of nodes counts.append(1.0 * pair[1] / G.GetNodes()) return (degrees, counts)
def testPowerLaw(network, k, a, c): """ k the number of citations a the constant c the exponent """ DegToCntV = snap.TIntPrV() snap.GetInDegCnt(network, DegToCntV) total_nodes = 0 actual = 0 for item in DegToCntV: total_nodes += item.GetVal2() if item.GetVal1() == k: actual = item.GetVal2() return str("%.3f" % log10(actual/total_nodes)) + ' = ' + str("%.3f" % (log10(a) - c*log10(k)))
def get_deg_dist(g): # extract vertices degree distribution of graph (g) CntV = snap.TIntPrV() snap.GetOutDegCnt(g, CntV) deg_dist = pd.DataFrame([(p.GetVal1(), p.GetVal2()) for p in CntV], columns=["deg", "cnt"]) deg_dist['type'] = 'out_deg' CntV = snap.TIntPrV() snap.GetInDegCnt(g, CntV) deg_dist2 = pd.DataFrame([(p.GetVal1(), p.GetVal2()) for p in CntV], columns=["deg", "cnt"]) deg_dist2['type'] = 'in_deg' all_deg = pd.concat((deg_dist, deg_dist2)) return all_deg
def plot_degree_distribution(G, chords_dict, genre): DegToCntV = snap.TIntPrV() snap.GetInDegCnt(G, DegToCntV) # for item in DegToCntV: # print(item.GetVal1(), item.GetVal2()) in_degrees = [item.GetVal1() for item in DegToCntV if item.GetVal1() > 0] num_nodes = [item.GetVal2() for item in DegToCntV if item.GetVal1() > 0] degs = [] for id in sorted(chords_dict): NI = G.GetNI(id) degs.append(NI.GetInDeg()) degs = np.array(degs) LIMIT = 10 x_labels = [chords_dict[x] for x in np.argsort(degs)[::-1]][:LIMIT] y = np.sort(degs)[::-1][:LIMIT] # plt.bar(range(LIMIT), y) # plt.xticks(range(LIMIT), x_labels) # plt.xlabel('Chord') # plt.ylabel('In-degree') # plt.title('Most common chords in ' + genre + ' music') # plt.savefig('../figures/common-'+genre+'-chords') # plt.close() plt.scatter(in_degrees, num_nodes, label=genre) plt.xscale('log') plt.yscale('log') axes = plt.gca() axes.set_xlim([min(in_degrees), max(in_degrees)]) plt.ylabel('Number of nodes') plt.xlabel('In-degree') plt.legend()
def degreeDistribution(graph): numNodes = float(graph.GetNodes()) # in degree dist DegToCntV = snap.TIntPrV() snap.GetInDegCnt(graph, DegToCntV) xIn = [] yIn = [] for item in DegToCntV: xIn.append(item.GetVal1()) yIn.append(item.GetVal2() / numNodes) print 'max in degree:', max(xIn) print 'min in degree:', min(xIn) # out degree dist DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(graph, DegToCntV) xOut = [] yOut = [] for item in DegToCntV: xOut.append(item.GetVal1()) yOut.append(item.GetVal2() / numNodes) print 'max out degree:', max(xOut) print 'min out degree:', min(xOut) # degree dist DegToCntV = snap.TIntPrV() snap.GetDegCnt(graph, DegToCntV) x = [] y = [] x1 = [] # after pruning outliers y1 = [] # after pruning outliers outLimit = 10**2.5 # 2.5 for prelim for item in DegToCntV: x.append(item.GetVal1()) y.append(item.GetVal2() / numNodes) if item.GetVal1() < outLimit: x1.append(item.GetVal1()) y1.append(item.GetVal2() / numNodes) xMin = min(x) - 0.5 print 'max total degree:', max(x) print 'min total degree:', xMin # test # DegToCntV2 = snap.TIntPrV() # snap.GetDegCnt(graph2, DegToCntV2) # xG = [] # yG = [] # for item in DegToCntV2: # xG.append(item.GetVal1()) # yG.append(item.GetVal2() / float(graph2.GetNodes())) # print xG # print yG # exit(1) # lse x1 = [math.log10(float(i)) for i in x1] y1 = [math.log10(float(i)) for i in y1] fit = np.polyfit(x1, y1, deg=1) print 'a: ' + str(fit[0]) + ', b: ' + str(fit[1]) x1 = np.linspace(1, 10**4, len(x)) y1 = [i**fit[0] * 10**fit[1] for i in x1] # # print len(x) # print np.dot(x, y) # print graph.GetNodes() # exit(1) m = graph.GetNodes() # todo try dict of x, y # mlle # for each x, sum over it y times where y is the num of occurrences (not proportion) alphaMLLE = 1 + (graph.GetNodes() / (sum([np.log(i / xMin) * y[x.index(i)] * m for i in x]))) print alphaMLLE x2 = np.linspace(1, 10**4, len(x)) y2 = [((alphaMLLE - 1) / xMin) * ((i / xMin)**(-1 * alphaMLLE)) for i in x2] dSum = 0 numSamples = m for key in x: dSum += np.log(key) * y[x.index(key)] * m mlle = 1 + numSamples / float(dSum) print mlle # theoretical power pdf yPdf = [1 / float(i**2) for i in x2] # plot # plt.loglog(xIn, yIn, color='black', ls='None', marker='.', label='in degree') # plt.loglog(xOut, yOut, color='red', ls='None', marker='.', label='out degree') plt.loglog(x, y, color='blue', ls='None', marker='.', label='Degree Distribution') plt.loglog(x1, y1, color='red', ls='solid', marker='None', label='Least Squares Estimate') plt.loglog(x2, y2, color='green', ls='solid', marker='None', label='Max Log-Likelihood Estimate') # plt.loglog(xG, yG, color='black', ls='None', marker='.', label='generated power dist') # plt.loglog(x2, yPdf, color='black', ls='solid', marker='None', label='theoretical power law pdf') plt.xlabel('Node Degree') plt.ylabel('Proportion of Nodes') plt.title('Degree Distribution of BTCtalk and BTC subreddit') plt.legend() plt.show() return
# Now plot it plt.loglog(X, Y, color = 'r', label = 'GitHub User-PR Network') plt.xlabel('Node Degree (log)') plt.ylabel('Proportion of Nodes with a Given Degree (log)') plt.title('Degree Distribution of GitHub User-PR Network') plt.legend() plt.show() # Now do in and out degree counts # On pruned # In X, Y = [], [] DegToCntV = snap.TIntPrV() snap.GetInDegCnt(graph, DegToCntV) for item in DegToCntV: Y.append(item.GetVal2()) X.append(item.GetVal1()) # Need proportion total = float(sum(Y)) Y = [y / total for y in Y] # Now plot it plt.loglog(X, Y, color = 'r', label = 'GitHub User-PR Network - In Degree') # Out X, Y = [], [] DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(graph, DegToCntV)
# 1.5 print("The number of reciprocated edges is %s." % ( snap.CntUniqDirEdges(wikiGraph) - snap.CntUniqUndirEdges(wikiGraph))) # 1.6 print("The number of nodes of zero out-degree is %s." % ( snap.CntOutDegNodes(wikiGraph, 0))) # 1.7 print("The number of nodes of zero in-degree is %s." % ( snap.CntInDegNodes(wikiGraph, 0))) # 1.8 outDegreeToCount = snap.TIntPrV() snap.GetOutDegCnt(wikiGraph, outDegreeToCount) numNodesLargeOutDegree = sum([item.GetVal2() for item in outDegreeToCount if item.GetVal1() > DEGREE_BOUNDARY]) print("The number of nodes with more than %s outgoing edges is %s." % ( DEGREE_BOUNDARY, numNodesLargeOutDegree)) # 1.9 inDegreeCount = snap.TIntPrV() snap.GetInDegCnt(wikiGraph, inDegreeCount) numNodesSmallInDegree = sum([item.GetVal2() for item in inDegreeCount if item.GetVal1() < DEGREE_BOUNDARY]) print("The number of nodes with less than %s incoming edges is %s." % ( DEGREE_BOUNDARY, numNodesSmallInDegree))
Ec = 0 for EI in Graph.Edges(): Ec += 1 print Ec if SCCHashmap.IsKey(EI.GetSrcNId()) and InOutHashmap.IsKey(EI.GetDstNId()): Out += 1 elif InOutHashmap.IsKey(EI.GetSrcNId()) and SCCHashmap.IsKey( EI.GetDstNId()): In += 1 elif InOutHashmap.IsKey(EI.GetSrcNId()) and InOutHashmap.IsKey( EI.GetDstNId()): Tendrils += 1 print In, Out, Tendrils """ In, Out = 0, 0 Ec = 0 for EI in Graph.Edges(): if MaxScc[0].IsNIdIn(EI.GetSrcNId()) and not MaxScc[0].IsNIdIn(EI.GetDstNId()): Out += 1 print "Out: %d" % Out elif not MaxScc[0].IsNIdIn(EI.GetSrcNId()) and MaxScc[0].IsNIdIn(EI.GetDstNId()): In += 1 print "In: %d" % In DegToCntV = snap.TIntPrV() snap.GetInDegCnt(Graph, DegToCntV)
nodestatusList.append('B') nodedegreeList.append(NI.GetOutDeg()) nodeaffectList.append(0) nodeinitialList.append(0) #print "Updated List : ", nodedegreeList for NI in G.Nodes(): nid = NI.GetId() print len(list(NI.GetOutEdges())) break degreefile = open("degree_list.txt", "w") # Undirected - Plot degree distribution and data DegToCntV = snap.TIntPrV() snap.GetInDegCnt(G, DegToCntV) for p in DegToCntV: degreefile.write("%d %d\r\n" % (p.GetVal1(), p.GetVal2())) in_degrees = [(item.GetVal2(), item.GetVal1()) for item in DegToCntV] snap.PlotInDegDistr(G, "project_degree", "Undirected graph - degree Distribution", False, True) #Initialize for i in range(10000): randomID = random.randint(1, 82168) while(nodestatusList[randomID]=="A"): randomID = random.randint(1, 82168) nodestatusList[randomID]='A' nodeinitialList[randomID]=1 #print "Updated List : ", nodestatusList
G = snap.LoadEdgeList(snap.PNGraph, fname, col1, col2) print "\ngraph nodes %d, edges %d" % (G.GetNodes(), G.GetEdges()) WccV = snap.TIntPrV() snap.GetWccSzCnt(G, WccV) print "\n# of connected component sizes", WccV.Len() for comp in WccV: print "size %d, number of components %d" % (comp.GetVal1(), comp.GetVal2()) MxWcc = snap.GetMxWcc(G) print "\nmax wcc nodes %d, edges %d" % (MxWcc.GetNodes(), MxWcc.GetEdges()) InDegCntV = snap.TIntPrV() snap.GetInDegCnt(G, InDegCntV) print "\n# of different in-degrees", InDegCntV.Len() for item in InDegCntV: print "in-degree %d, number of nodes %d" % (item.GetVal1(), item.GetVal2()) OutDegCntV = snap.TIntPrV() snap.GetOutDegCnt(G, OutDegCntV) print "\n# of different out-degrees", OutDegCntV.Len() for item in OutDegCntV: print "out-degree %d, number of nodes %d" % (item.GetVal1(), item.GetVal2()) PRankH = snap.TIntFltH()
import snap import matplotlib.pyplot as plt import numpy as np from pathlib import Path import sys gfile = sys.argv[1] print('Printing summary stats for file at:', gfile) if gfile.endswith('.graph'): FIn = snap.TFIn(gfile) Network = snap.TUNGraph.Load(FIn) else: Network = snap.LoadEdgeList(snap.PUNGraph, gfile, 0, 1) snap.PrintInfo(Network) print('Edges:', snap.CntUniqUndirEdges(Network)) # for directed graphs, should be same for undir DegToCntV = snap.TIntPrV() snap.GetInDegCnt(Network, DegToCntV) print('Nodes with deg > 10', sum([item.GetVal2() for item in DegToCntV if item.GetVal1() > 10])) ClustCoeff = snap.GetClustCf(Network, 10000) print('Clustering coeff', ClustCoeff)
def degreeDistribution(graph): numNodes = float(graph.GetNodes()) # in degree dist DegToCntV = snap.TIntPrV() snap.GetInDegCnt(graph, DegToCntV) xIn = [] yIn = [] for item in DegToCntV: xIn.append(item.GetVal1()) yIn.append(item.GetVal2() / numNodes) print 'max in degree:', max(xIn) print 'min in degree:', min(xIn) # out degree dist DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(graph, DegToCntV) xOut = [] yOut = [] for item in DegToCntV: xOut.append(item.GetVal1()) yOut.append(item.GetVal2() / numNodes) print 'max out degree:', max(xOut) print 'min out degree:', min(xOut) # degree dist DegToCntV = snap.TIntPrV() snap.GetDegCnt(graph, DegToCntV) x = [] y = [] x1 = [] y1 = [] outLimit = 10**2.5 for item in DegToCntV: x.append(item.GetVal1()) y.append(item.GetVal2() / numNodes) if item.GetVal1() < outLimit: x1.append(item.GetVal1()) y1.append(item.GetVal2() / numNodes) print 'max total degree:', max(x) print 'min total degree:', min(x) # lse x1 = [math.log10(float(i)) for i in x1] y1 = [math.log10(float(i)) for i in y1] fit = np.polyfit(x1, y1, deg=1) print 'a: ' + str(fit[0]) + ', b: ' + str(fit[1]) x1 = np.linspace(1, 10**3, len(x)) y1 = [i**fit[0] * 10**fit[1] for i in x1] # plot # plt.loglog(xIn, yIn, color='black', ls='None', marker='.', label='in degree') # plt.loglog(xOut, yOut, color='red', ls='None', marker='.', label='out degree') plt.loglog(x, y, color='blue', ls='None', marker='.', label='total degree') plt.loglog(x1, y1, color='red', ls='solid', marker='None', label='total degree lse') plt.xlabel('node degree') plt.ylabel('proportion of nodes') plt.title('Degree distribution of btctalk and btc subreddit') plt.legend() plt.show() return
import snap from math import floor from itertools import islice, cycle #Problem 1 g = snap.LoadEdgeList(snap.PNGraph, "p2p-Gnutella08.txt", 0, 1) #1.a-e info_filename = "gnutella_info.txt" snap.PrintInfo(g, 'Gnutella P2P network 2008', info_filename, False) with open(info_filename, 'r') as inf: for line in inf: print(line) #Below addresses 1.f,g g_outdeg = snap.TFltPr64V() g_indeg = snap.TFltPr64V() snap.GetOutDegCnt(g, g_outdeg) snap.GetInDegCnt(g, g_indeg) #g_outdeg is a vector of pairs of floats. Each pair is addressed like (Val1,Val2) outdeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_outdeg)) indeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_indeg)) print(f'Nodes with outdegree > 10: {len(outdeg_gt_10)}') print(f'Nodes with indegree > 10: {len(indeg_gt_10)}') #Problem 2 so = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt") #2.1 so_wcc = snap.TCnComV() snap.GetWccs(so, so_wcc) print(f'# of connected components: {len(so_wcc)}') #2.2 so_mx_wcc = snap.GetMxWcc(so) snap.PrintInfo(so_mx_wcc, "Largest connected component of StackOverflow-Java") #2.3