def solve_degree_based_questions(G, GName): #Number of nodes with degre seven CntV = snap.TIntPrV() snap.GetOutDegCnt(G, CntV) flag = 0 for p in CntV: if p.GetVal1() == 7: flag = p.GetVal2() break print "Number of nodes with degree=7 in %s: %d" % (GName[:-10], flag) #To find the number of nodes with maximum degree and thier IDs MaxDegree = CntV[len(CntV) - 1].GetVal1() Nodes_with_max_deg = [] for NI in G.Nodes(): if NI.GetOutDeg() == MaxDegree: Nodes_with_max_deg.append(str(NI.GetId())) string_of_nodes_with_max_deg = ",".join(Nodes_with_max_deg) print "Node id (s) with highest degree in {0}: {1}".format( GName[:-10], string_of_nodes_with_max_deg) #Plots the Degree Distribution filename = "outDeg." + GName[:-10] + ".png" snap.PlotOutDegDistr(G, GName[:-10], GName[:-10] + " - out-degree Distribution") print "Degree distribution of {0} is in: {1}".format(GName[:-10], filename)
def get_out_dists(G): deg_counts = [] degs = [] deg_vect = snap.TIntPrV() snap.GetOutDegCnt(G, deg_vect) for item in deg_vect: deg = item.GetVal1() cnt = item.GetVal2() deg_counts.append(cnt) degs.append(deg) out_deg = [] out_counts = [] cur_deg = min(degs) for deg, cnt in zip(degs, deg_counts): # while cur_deg < deg: # out_deg.append(cur_deg) # out_counts.append(0) # cur_deg += 1 out_deg.append(deg) out_counts.append(cnt) cur_deg += 1 deg_counts = np.asarray(out_counts) degs = np.asarray(out_deg) pdf = deg_counts.astype(float) / sum(deg_counts) cdf = np.cumsum(pdf) cdf = np.insert(cdf, 0, 0) ccdf = 1 - cdf return deg_counts, degs, cdf, ccdf, pdf
def OutDeg(graph): outdir = 'temp/' tmp_arr = [] out_arr = snap.TIntPrV() snap.GetOutDegCnt(graph, out_arr) for item in out_arr: cnt = item.GetVal2() deg = item.GetVal1() tmp_arr.append((deg, cnt)) tmp_arr = np.array(tmp_arr) out_fname = os.path.join('temp', 'outdegdistr.png') plt.clf() plt.figure(1) plt.subplots_adjust(left=0.075, bottom=0.075, right=1., top=1., wspace=0., hspace=0.) plt.plot(tmp_arr[:, 0], tmp_arr[:, 1], '-x') plt.yscale('log') if tmp_arr[:, 0].max() > MAX_XTICKS_NUM: skip = int(tmp_arr[:, 0].max()) / MAX_XTICKS_NUM plt.xticks( np.arange(0, tmp_arr[:, 0].max() + 1 + skip, skip) ) else: plt.xticks(np.arange(tmp_arr[:, 0].max() + 1)) plt.xlim(0, tmp_arr[:, 0].max()) plt.ylim(0, tmp_arr[:, 1].max()) plt.xlabel('Out-degrees', fontsize=16) plt.ylabel('Number of nodes', fontsize=16) plt.grid(True) plt.savefig(out_fname, dpi=300, format='png')
def avgDegreeDist(family, direction, numSamples, apiGraph): path = 'data/graphs/' + family + '/' files = os.listdir(path) if apiGraph: graph_files = filter(lambda x: '.apigraph' in x, files) else: graph_files = filter(lambda x: '.edges' in x, files) random.shuffle(graph_files) maxdeg = 0 if apiGraph: Gs = [snap.TNEANet.Load(snap.TFIn(path + f)) for f in graph_files[:numSamples]] else: Gs = [snap.LoadEdgeList(snap.PNEANet, path + f, 0, 1) for f in graph_files[:numSamples]] if direction == 'in': maxdeg = max([G.GetNI((snap.GetMxInDegNId(G))).GetInDeg() for G in Gs]) else: maxdeg = max([G.GetNI((snap.GetMxOutDegNId(G))).GetOutDeg() for G in Gs]) avg_deg_dist = np.zeros(maxdeg + 1) for G in Gs: DegToCntV = snap.TIntPrV() if direction == 'in': snap.GetInDegCnt(G, DegToCntV) else: snap.GetOutDegCnt(G, DegToCntV) for item in DegToCntV: deg = item.GetVal1() avg_deg_dist[deg] += item.GetVal2() avg_deg_dist = avg_deg_dist / numSamples return avg_deg_dist
def plotDegreeDistribution(G): # # Get Degree Distribution # OutDegToCntV = snap.TIntPrV() snap.GetOutDegCnt(G, OutDegToCntV) count = 0 nodeList = [] degreeList = [] for item in OutDegToCntV: (n, d) = (item.GetVal2(), item.GetVal1()) nodeList.append(n) degreeList.append(d) x = np.array([ np.log10(item.GetVal1()) for itemm in OutDegToCntV if item.GetVal1() > 0 ]) y = np.array([ np.log10(item.GetVal2()) for item in OutDegToCntV if item.GetVal2() > 0 ]) # # Plot Degree Distribution # plt.figure(figsize=(15, 15)) loglog(degreeList, nodeList, 'bo') #plt.plot(x_plot, 10**b*x_plot**a, 'r-') plt.title("LogLog plot of out-degree distribution") plt.show() return
def f(): snap = self.snap DegToCntV = snap.TFltPr64V() snap.GetOutDegCnt(self.graph, DegToCntV) ret = [] for item in DegToCntV: ret.append((item.GetVal1(), item.GetVal2())) return ret
def get_out_degree_distribution(Graph): snap.GetOutDegCnt(Graph, DegToCntV) num_node = Graph.GetNodes() XO, YO = [], [] for item in DegToCntV: if item.GetVal1() == 0 or item.GetVal2() == 0: continue XO.append(item.GetVal1()) YO.append(item.GetVal2() * 1.0 / num_node) return XO, YO
def getDataPointsToPlot(Graph, degType): """ return values: X: list of degrees Y: list of frequencies: Y[i] = fraction of nodes with degree X[i] """ ############################################################################ DegToCntV = snap.TIntPrV() if degType == "In": snap.GetInDegCnt(Graph, DegToCntV) elif degType == "Out": snap.GetOutDegCnt(Graph, DegToCntV) elif degType == "Total": snap.GetDegCnt(Graph, DegToCntV) else: raise ValueError("Invalid degree type: please use 'In', 'Out' or 'Total'.") NumNodes = Graph.GetNodes() DegToFrqV = { item.GetVal1() : float(item.GetVal2())/NumNodes for item in DegToCntV } DegToFrqV = sorted(DegToFrqV.items()) X, Y = zip(*DegToFrqV) ############################################################################ return X, Y def plot_graph(name): G = load_graph(name) print "{} graph nodes: {}".format(name, G.GetNodes()) print "{} graph edges: {}".format(name, G.GetEdges()) x_in, y_in = getDataPointsToPlot(G, 'In') plt.loglog(x_in, y_in, marker=',', color = 'y', label = 'In Degree') x_out, yout = getDataPointsToPlot(G, 'Out') plt.loglog(x_out, y_out, marker=',', color = 'r', label = 'Out Degree') x_total, y_total = getDataPointsToPlot(G, 'Total') plt.loglog(x_total, y_total, marker=',', color = 'b', label = 'Total Degree') #linestyle = 'dotted' plt.xlabel('Node Degree (log)') plt.ylabel('Proportion of Nodes with a Given Degree (log)') plt.title('Degree Distribution of In, Out, and Total degree for {} network'.format(name)) plt.legend() plt.show() if __name__ == "__main__": # Plot distribution graphs for RT, MT, RE, Social networks plot_graph("retweet") plot_graph("mention") plot_graph("reply") plot_graph("social")
def Get_Out_Degree_Distribution(G): Deg_dist = snap.TIntPrV() snap.GetOutDegCnt(G, Deg_dist) degree = np.empty((1, 0)) count = np.empty((1, 0)) for node_degree_pr in Deg_dist: if node_degree_pr.GetVal1() > 0: degree = np.append(degree, node_degree_pr.GetVal1()) count = np.append(count, node_degree_pr.GetVal2()) '''
def plot_degree_distribution(G, name): filename = '../analysis/' + name + '_DegDistr' description = name + ': Degree Distribution' X, Y = [], [] DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(G, DegToCntV) for item in DegToCntV: X.append(item.GetVal1()) Y.append(item.GetVal2()) plt.xlabel('Node Degree') plt.ylabel('Number of Nodes with a Given Degree') plt.title(description) plt.plot(X, Y, 'ro') plt.savefig(filename) plt.show()
def getInDegDistr(G, outdeg): degHistogram = snap.TIntPrV() if outdeg: snap.GetOutDegCnt(G, degHistogram) else: snap.GetInDegCnt(G, degHistogram) degDistr = [(pair.GetVal1(), pair.GetVal2()) for pair in degHistogram] degDistr = sorted(degDistr, key=lambda pair: pair[0], reverse=False) degrees = [] counts = [] for pair in degDistr: #first = degree degrees.append(pair[0]) #second = #nodes of degree - normalize by total nodes to get proportion of nodes counts.append(1.0 * pair[1] / G.GetNodes()) return (degrees, counts)
def get_deg_dist(g): # extract vertices degree distribution of graph (g) CntV = snap.TIntPrV() snap.GetOutDegCnt(g, CntV) deg_dist = pd.DataFrame([(p.GetVal1(), p.GetVal2()) for p in CntV], columns=["deg", "cnt"]) deg_dist['type'] = 'out_deg' CntV = snap.TIntPrV() snap.GetInDegCnt(g, CntV) deg_dist2 = pd.DataFrame([(p.GetVal1(), p.GetVal2()) for p in CntV], columns=["deg", "cnt"]) deg_dist2['type'] = 'in_deg' all_deg = pd.concat((deg_dist, deg_dist2)) return all_deg
def getDataPointsToPlot(Graph): """ :param - Graph: snap.PUNGraph object representing an undirected graph return values: X: list of degrees Y: list of frequencies: Y[i] = fraction of nodes with degree X[i] """ ############################################################################ # TODO: Your code here! X, Y = [], [] CntV = snap.TIntPrV() snap.GetOutDegCnt(Graph, CntV) for p in CntV: X.append(p.GetVal1()) Y.append(p.GetVal2()) # print("degree %d: count %d" % (p.GetVal1(), p.GetVal2())) ############################################################################ return X, Y
def getDataPointsToPlot(self, Graph): """ :param - Graph: snap.PUNGraph object representing an undirected graph return values: X: list of degrees Y: list of frequencies: Y[i] = fraction of nodes with degree X[i] """ ############################################################################ # TODO: Your code here! X, Y = [], [] degree_vec = snap.TIntPrV() #degree vector snap.GetOutDegCnt(Graph, degree_vec) X = [item.GetVal1() for item in degree_vec] counts = [item.GetVal2() for item in degree_vec] degree_tot = sum(counts) Y = [item.GetVal2() / (1.0 * degree_tot) for item in degree_vec] ############################################################################ return X, Y
# 1.5 print("The number of reciprocated edges is %s." % ( snap.CntUniqDirEdges(wikiGraph) - snap.CntUniqUndirEdges(wikiGraph))) # 1.6 print("The number of nodes of zero out-degree is %s." % ( snap.CntOutDegNodes(wikiGraph, 0))) # 1.7 print("The number of nodes of zero in-degree is %s." % ( snap.CntInDegNodes(wikiGraph, 0))) # 1.8 outDegreeToCount = snap.TIntPrV() snap.GetOutDegCnt(wikiGraph, outDegreeToCount) numNodesLargeOutDegree = sum([item.GetVal2() for item in outDegreeToCount if item.GetVal1() > DEGREE_BOUNDARY]) print("The number of nodes with more than %s outgoing edges is %s." % ( DEGREE_BOUNDARY, numNodesLargeOutDegree)) # 1.9 inDegreeCount = snap.TIntPrV() snap.GetInDegCnt(wikiGraph, inDegreeCount) numNodesSmallInDegree = sum([item.GetVal2() for item in inDegreeCount if item.GetVal1() < DEGREE_BOUNDARY]) print("The number of nodes with less than %s incoming edges is %s." % ( DEGREE_BOUNDARY, numNodesSmallInDegree))
def basic_analysis(): FIn = snap.TFIn("../graphs/ph_simple.graph") G = snap.TUNGraph.Load(FIn) numNodes = G.GetNodes() print "num nodes: ", numNodes numEdges = G.GetEdges() print "num edges: ", numEdges # clustering coefficient print "\nclustering coefficient" print "Clustering G: ", snap.GetClustCf(G) ER = snap.GenRndGnm(snap.PUNGraph, numNodes, numEdges) print "Clustering ER: ", snap.GetClustCf(ER) # degree distribution histogram print "\ndegree distribution histogram" x_erdosRenyi, y_erdosRenyi = getDataPointsToPlot(ER) plt.loglog(x_erdosRenyi, y_erdosRenyi, color = 'g', label = 'Erdos Renyi Network') x_smallWorld, y_smallWorld = getDataPointsToPlot(G) plt.loglog(x_smallWorld, y_smallWorld, linestyle = 'dashed', color = 'b', label = 'PH Agency Network') plt.xlabel('Node Degree (log)') plt.ylabel('Proportion of Nodes with a Given Degree (log)') plt.title('Degree Distribution of Erdos Renyi and PH Agency Network') plt.legend() plt.show() # degree print "\ndegree distribution" deg_sum = 0.0 CntV = snap.TIntPrV() snap.GetOutDegCnt(G, CntV) for p in CntV: deg_sum += p.GetVal1() * p.GetVal2() max_node = G.GetNI(snap.GetMxDegNId(G)) deg_sum /= float(numNodes) print "average degree: ", deg_sum # same for G and ER print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId() deg_sum = 0.0 max_node = ER.GetNI(snap.GetMxDegNId(ER)) print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId() # diameter print "\ndiameter" diam = snap.GetBfsFullDiam(G, 10) print "Diameter: ", diam print "ER Diameter: ", snap.GetBfsFullDiam(ER, 10) # triads print "\ntriads" print "Triads: ", snap.GetTriads(G) print "ER Triads: ", snap.GetTriads(ER) # centrality print "\ncentrality" max_dc = 0.0 maxId = -1 all_centr = [] for NI in G.Nodes(): DegCentr = snap.GetDegreeCentr(G, NI.GetId()) all_centr.append(DegCentr) if DegCentr > max_dc: max_dc = DegCentr maxId = NI.GetId() print "max" print "node: %d centrality: %f" % (maxId, max_dc) print "average centrality: ", np.mean(all_centr) print "ER" max_dc = 0.0 maxId = -1 all_centr = [] for NI in ER.Nodes(): DegCentr = snap.GetDegreeCentr(ER, NI.GetId()) all_centr.append(DegCentr) if DegCentr > max_dc: max_dc = DegCentr maxId = NI.GetId() print "max" print "node: %d centrality: %f" % (maxId, max_dc) print "average centrality: ", np.mean(all_centr)
def degreeDistribution(graph): numNodes = float(graph.GetNodes()) # in degree dist DegToCntV = snap.TIntPrV() snap.GetInDegCnt(graph, DegToCntV) xIn = [] yIn = [] for item in DegToCntV: xIn.append(item.GetVal1()) yIn.append(item.GetVal2() / numNodes) print 'max in degree:', max(xIn) print 'min in degree:', min(xIn) # out degree dist DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(graph, DegToCntV) xOut = [] yOut = [] for item in DegToCntV: xOut.append(item.GetVal1()) yOut.append(item.GetVal2() / numNodes) print 'max out degree:', max(xOut) print 'min out degree:', min(xOut) # degree dist DegToCntV = snap.TIntPrV() snap.GetDegCnt(graph, DegToCntV) x = [] y = [] x1 = [] # after pruning outliers y1 = [] # after pruning outliers outLimit = 10**2.5 # 2.5 for prelim for item in DegToCntV: x.append(item.GetVal1()) y.append(item.GetVal2() / numNodes) if item.GetVal1() < outLimit: x1.append(item.GetVal1()) y1.append(item.GetVal2() / numNodes) xMin = min(x) - 0.5 print 'max total degree:', max(x) print 'min total degree:', xMin # test # DegToCntV2 = snap.TIntPrV() # snap.GetDegCnt(graph2, DegToCntV2) # xG = [] # yG = [] # for item in DegToCntV2: # xG.append(item.GetVal1()) # yG.append(item.GetVal2() / float(graph2.GetNodes())) # print xG # print yG # exit(1) # lse x1 = [math.log10(float(i)) for i in x1] y1 = [math.log10(float(i)) for i in y1] fit = np.polyfit(x1, y1, deg=1) print 'a: ' + str(fit[0]) + ', b: ' + str(fit[1]) x1 = np.linspace(1, 10**4, len(x)) y1 = [i**fit[0] * 10**fit[1] for i in x1] # # print len(x) # print np.dot(x, y) # print graph.GetNodes() # exit(1) m = graph.GetNodes() # todo try dict of x, y # mlle # for each x, sum over it y times where y is the num of occurrences (not proportion) alphaMLLE = 1 + (graph.GetNodes() / (sum([np.log(i / xMin) * y[x.index(i)] * m for i in x]))) print alphaMLLE x2 = np.linspace(1, 10**4, len(x)) y2 = [((alphaMLLE - 1) / xMin) * ((i / xMin)**(-1 * alphaMLLE)) for i in x2] dSum = 0 numSamples = m for key in x: dSum += np.log(key) * y[x.index(key)] * m mlle = 1 + numSamples / float(dSum) print mlle # theoretical power pdf yPdf = [1 / float(i**2) for i in x2] # plot # plt.loglog(xIn, yIn, color='black', ls='None', marker='.', label='in degree') # plt.loglog(xOut, yOut, color='red', ls='None', marker='.', label='out degree') plt.loglog(x, y, color='blue', ls='None', marker='.', label='Degree Distribution') plt.loglog(x1, y1, color='red', ls='solid', marker='None', label='Least Squares Estimate') plt.loglog(x2, y2, color='green', ls='solid', marker='None', label='Max Log-Likelihood Estimate') # plt.loglog(xG, yG, color='black', ls='None', marker='.', label='generated power dist') # plt.loglog(x2, yPdf, color='black', ls='solid', marker='None', label='theoretical power law pdf') plt.xlabel('Node Degree') plt.ylabel('Proportion of Nodes') plt.title('Degree Distribution of BTCtalk and BTC subreddit') plt.legend() plt.show() return
pass try: G.AddNode(node2) except: pass G.AddEdge(node1, node2) fd_in.close() # Output Sentences print("Number of nodes: {}".format(G.GetNodes())) print("Number of edges: {}".format(G.GetEdges())) # [2] Degree of nodes in the network DegToCnt = snap.TIntPrV() snap.GetOutDegCnt(G, DegToCnt) degree_count = {} for item in DegToCnt: degree_count[item.GetVal1()] = item.GetVal2() OutDeg = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDeg) node_deg = {} for item in OutDeg: node_deg[item.GetVal1()] = item.GetVal2() max_deg_nodes = [k for k, v in node_deg.items() if v == max(node_deg.values())] # Output sentences print("Number of nodes with degree=7: {}".format(snap.CntOutDegNodes(G, 7))) print("Node id(s) with highest degree: ", end=" ")
def get_graph_overview(G, Gd=None): ''' G here is an undirected graph ''' # degree distribution CntV = snap.TIntPrV() snap.GetOutDegCnt(G, CntV) deg_x, deg_y = [], [] max_deg = 0 for item in CntV: max_deg = max(max_deg, item.GetVal1()) deg_x.append(item.GetVal1()) deg_y.append(item.GetVal2()) # print item.GetVal1(), item.GetVal2() print 'max_deg = ', max_deg deg_cnt = np.zeros(max_deg + 1) for item in CntV: deg_cnt[item.GetVal1()] = item.GetVal2() print deg_cnt # plt.loglog(deg_x, deg_y) # plt.xlabel('Degree of nodes') # plt.ylabel('Number of nodes') # plt.savefig('Giu_deg_dist.png') # plt.clf() # clustering coefficient distribution cf = snap.GetClustCf(G) print 'average cf =', cf NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(G, NIdCCfH) ccf_sum = np.zeros(max_deg + 1) for item in NIdCCfH: ccf_sum[G.GetNI(item).GetDeg()] += NIdCCfH[item] # print item, NIdCCfH[item] ccf_x, ccf_y = [], [] for i in range(max_deg + 1): if deg_cnt[i] != 0: ccf_sum[i] /= deg_cnt[i] ccf_x.append(i) ccf_y.append(ccf_sum[i]) print ccf_y # plt.loglog(ccf_x, ccf_y) # plt.xlabel('Degree of nodes') # plt.ylabel('Average clustering coefficient of nodes with the degree') # plt.savefig('Giu_ccf_dist.png') # plt.clf() # snap.PlotClustCf(G, 'investor_network', 'Distribution of clustering coefficients') # diameter and shortest path distribution diam = snap.GetBfsFullDiam(G, 100) print diam # snap.PlotShortPathDistr(G, 'investor_network', 'Distribution of shortest path length') # rewired_diams = [] # for i in range(100): # print 'rewire: ', i # G_config = rewire_undirected_graph(G) # rewired_diams.append(snap.GetBfsFullDiam(G_config, 400)) # print rewired_diams # print 'null model diam mean: ', np.mean(rewired_diams) # print 'null model diam std: ', np.std(rewired_diams) # wcc and scc size distribution WccSzCnt = snap.TIntPrV() snap.GetWccSzCnt(G, WccSzCnt) print 'Distribution of wcc:' for item in WccSzCnt: print item.GetVal1(), item.GetVal2() if Gd != None: print 'Distribution of scc:' ComponentDist = snap.TIntPrV() snap.GetSccSzCnt(Gd, ComponentDist) for item in ComponentDist: print item.GetVal1(), item.GetVal2()
snap.PlotInDegDistr(G1, "Indeg", "Directed graph - in-degree") snap.PlotOutDegDistr(G1, "Outdeg", "Directed graph - out-degree") # vector of pairs of integers (size, count) ComponentDist = snap.TIntPrV() # get distribution of connected components (component size, count) snap.GetWccSzCnt(G1, ComponentDist) for comp in ComponentDist: print "Size: %d - Number of Components: %d" % (comp.GetVal1(), comp.GetVal2()) Count = snap.CntUniqDirEdges(G1) print "Directed Graph: Count of unique directed edges is %d" % Count # get degree distribution pairs (degree, count) snap.GetOutDegCnt(G1, ComponentDist) print "Degree Distribution Pairs-" xval = [] yval = [] for item in ComponentDist: print "%d nodes with out-degree %d" % (item.GetVal2(), item.GetVal1()) xval.append(item.GetVal1()) yval.append(item.GetVal2()) bins = np.arange(len(yval)) plt.hist(yval, xval, alpha=0.5, label='Nodes with Out degree') plt.title('Distribution of Out degree by Nodes') plt.xlabel('Out degree') plt.ylabel('Number of Nodes') plt.xticks(bins, rotation=90) plt.show()
# delete nodes of out degree 3 and in degree 2 snap.DelDegKNodes(G8, 3, 2) # create a directed random graph on 10k nodes and 1k edges G9 = snap.GenRndGnm(snap.PNGraph, 10000, 1000) print "G9: Nodes %d, Edges %d" % (G9.GetNodes(), G9.GetEdges()) # define a vector of pairs of integers (size, count) and # get a distribution of connected components (component size, count) CntV = snap.TIntPrV() snap.GetWccSzCnt(G9, CntV) for p in CntV: print "size %d: count %d" % (p.GetVal1(), p.GetVal2()) # get degree distribution pairs (out-degree, count): snap.GetOutDegCnt(G9, CntV) for p in CntV: print "degree %d: count %d" % (p.GetVal1(), p.GetVal2()) # generate a Preferential Attachment graph on 100 nodes and out-degree of 3 G10 = snap.GenPrefAttach(100, 3) print "G10: Nodes %d, Edges %d" % (G10.GetNodes(), G10.GetEdges()) # define a vector of floats and get first eigenvector of graph adjacency matrix EigV = snap.TFltV() snap.GetEigVec(G10, EigV) nr = 0 for f in EigV: nr += 1 print "%d: %.6f" % (nr, f)
import snap from math import floor from itertools import islice, cycle #Problem 1 g = snap.LoadEdgeList(snap.PNGraph, "p2p-Gnutella08.txt", 0, 1) #1.a-e info_filename = "gnutella_info.txt" snap.PrintInfo(g, 'Gnutella P2P network 2008', info_filename, False) with open(info_filename, 'r') as inf: for line in inf: print(line) #Below addresses 1.f,g g_outdeg = snap.TFltPr64V() g_indeg = snap.TFltPr64V() snap.GetOutDegCnt(g, g_outdeg) snap.GetInDegCnt(g, g_indeg) #g_outdeg is a vector of pairs of floats. Each pair is addressed like (Val1,Val2) outdeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_outdeg)) indeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_indeg)) print(f'Nodes with outdegree > 10: {len(outdeg_gt_10)}') print(f'Nodes with indegree > 10: {len(indeg_gt_10)}') #Problem 2 so = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt") #2.1 so_wcc = snap.TCnComV() snap.GetWccs(so, so_wcc) print(f'# of connected components: {len(so_wcc)}') #2.2 so_mx_wcc = snap.GetMxWcc(so) snap.PrintInfo(so_mx_wcc, "Largest connected component of StackOverflow-Java") #2.3
import snap import numpy as np import matplotlib.pyplot as plt # P2 of HW1 G1 = snap.LoadEdgeList(snap.PNGraph, "wiki-Vote.txt", 0, 1) CntV = snap.TIntPrV() snap.GetOutDegCnt(G1, CntV) degs = {} for p in CntV: deg = p.GetVal1() degs[deg] = p.GetVal2() ps = sorted(degs.items()) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter([k for (k, v) in ps], [v for (k, v) in ps]) ax.set_xscale('log') ax.set_yscale('log') plt.xlim((1e-1, 1e3)) plt.ylim((1e-1, 1e4)) plt.title("Distribution of out-degree of nodes in the network") fig.savefig("p2.png")
snap.DrawGViz(u_rndm_graph, snap.gvlNeato, "graph_rdm_undirected.png", "Undirected Random Graph", True) # Plot the out degree distrib snap.PlotOutDegDistr(u_rndm_graph, "graph_rdm_undirected", "Undirected graph - out-degree Distribution") # Compute and print the list of all edges for vertex_in in u_rndm_graph.Nodes(): for vertex_out_id in vertex_in.GetOutEdges(): print "edge (%d %d)" % (vertex_in.GetId(), vertex_out_id) # Save it to an external file snap.SaveEdgeList(u_rndm_graph, "Rndm_graph.txt", "Save as tab-separated list of edges") # Compute degree distribution and save it to an external textfile degree_vertex_count = snap.TIntPrV() s.GetOutDegCnt(u_rndm_graph, degree_vertex_count) file = open("graph_rdm_undirected_degree_distrib.txt", "w") file.write("#----------------------------------\n") file.write("# Degree Distribution \n") file.write("#----------------------------------\n") file.write("\n") for pairs in degree_vertex_count: file.write("vertex degree %d: nmbr vertices with such degree %d \n" % (pairs.GetVal1(), pairs.GetVal2())) file.close() # Compute the sizes of the connected component and save it to an external file Components = snap.TCnComV() snap.GetSccs(u_rndm_graph, Components) file_2 = open("graph_rdm_undirected_connected_compo_sizes.txt", "w") file_2.write("#----------------------------------\n")
print "size %d, number of components %d" % (comp.GetVal1(), comp.GetVal2()) MxWcc = snap.GetMxWcc(G) print "\nmax wcc nodes %d, edges %d" % (MxWcc.GetNodes(), MxWcc.GetEdges()) InDegCntV = snap.TIntPrV() snap.GetInDegCnt(G, InDegCntV) print "\n# of different in-degrees", InDegCntV.Len() for item in InDegCntV: print "in-degree %d, number of nodes %d" % (item.GetVal1(), item.GetVal2()) OutDegCntV = snap.TIntPrV() snap.GetOutDegCnt(G, OutDegCntV) print "\n# of different out-degrees", OutDegCntV.Len() for item in OutDegCntV: print "out-degree %d, number of nodes %d" % (item.GetVal1(), item.GetVal2()) PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) #for item in PRankH: # print item, PRankH[item] slist = sorted(PRankH, key=lambda key: PRankH[key], reverse=True) print "\ntop 10 experts by PageRank" for item in slist[:10]: print "id %7s, pagerank %.6f" % (item, PRankH[item])
def outdegSNAP( graph ): DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(graph, DegToCntV) deg = [ dg.GetVal1() for dg in DegToCntV ] cnt = [ dg.GetVal2() for dg in DegToCntV ] return [deg, cnt]
def intro(): # create a graph PNGraph G1 = snap.TNGraph.New() G1.AddNode(1) G1.AddNode(5) G1.AddNode(32) G1.AddEdge(1, 5) G1.AddEdge(5, 1) G1.AddEdge(5, 32) print("G1: Nodes %d, Edges %d" % (G1.GetNodes(), G1.GetEdges())) # create a directed random graph on 100 nodes and 1k edges G2 = snap.GenRndGnm(snap.PNGraph, 100, 1000) print("G2: Nodes %d, Edges %d" % (G2.GetNodes(), G2.GetEdges())) # traverse the nodes for NI in G2.Nodes(): print("node id %d with out-degree %d and in-degree %d" % (NI.GetId(), NI.GetOutDeg(), NI.GetInDeg())) # traverse the edges for EI in G2.Edges(): print("edge (%d, %d)" % (EI.GetSrcNId(), EI.GetDstNId())) # traverse the edges by nodes for NI in G2.Nodes(): for Id in NI.GetOutEdges(): print("edge (%d %d)" % (NI.GetId(), Id)) # generate a network using Forest Fire model G3 = snap.GenForestFire(1000, 0.35, 0.35) print("G3: Nodes %d, Edges %d" % (G3.GetNodes(), G3.GetEdges())) # save and load binary FOut = snap.TFOut("test.graph") G3.Save(FOut) FOut.Flush() FIn = snap.TFIn("test.graph") G4 = snap.TNGraph.Load(FIn) print("G4: Nodes %d, Edges %d" % (G4.GetNodes(), G4.GetEdges())) # save and load from a text file snap.SaveEdgeList(G4, "test.txt", "Save as tab-separated list of edges") G5 = snap.LoadEdgeList(snap.PNGraph, "test.txt", 0, 1) print("G5: Nodes %d, Edges %d" % (G5.GetNodes(), G5.GetEdges())) # generate a network using Forest Fire model G6 = snap.GenForestFire(1000, 0.35, 0.35) print("G6: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges())) # convert to undirected graph G7 = snap.ConvertGraph(snap.PUNGraph, G6) print("G7: Nodes %d, Edges %d" % (G7.GetNodes(), G7.GetEdges())) # get largest weakly connected component of G WccG = snap.GetMxWcc(G6) # get a subgraph induced on nodes {0,1,2,3,4,5} SubG = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4)) # get 3-core of G Core3 = snap.GetKCore(G6, 3) # delete nodes of out degree 10 and in degree 5 snap.DelDegKNodes(G6, 10, 5) print("G6a: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges())) # generate a Preferential Attachment graph on 1000 nodes and node out degree of 3 G8 = snap.GenPrefAttach(1000, 3) print("G8: Nodes %d, Edges %d" % (G8.GetNodes(), G8.GetEdges())) # vector of pairs of integers (size, count) CntV = snap.TIntPrV() # get distribution of connected components (component size, count) snap.GetWccSzCnt(G8, CntV) # get degree distribution pairs (degree, count) snap.GetOutDegCnt(G8, CntV) # vector of floats EigV = snap.TFltV() # get first eigenvector of graph adjacency matrix snap.GetEigVec(G8, EigV) # get diameter of G8 snap.GetBfsFullDiam(G8, 100) # count the number of triads in G8, get the clustering coefficient of G8 snap.GetTriads(G8) snap.GetClustCf(G8)
DATA_PATH = './Wiki-Vote.txt' if __name__ == '__main__': # Build Wiki Graph G1 = snap.LoadEdgeList(snap.PNGraph, DATA_PATH, 0, 1) # use Snap.py own plot tools, but not shown. snap.PlotOutDegDistr(G1, 'Wiki', 'Wiki') # So I draw everything by my own. DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(G1, DegToCntV) out_deg = [] deg_cnt = [] for item in DegToCntV: deg_cnt.append(item.GetVal2()) out_deg.append(item.GetVal1()) out_deg_dis = pd.DataFrame({'Out_Degree_Value': out_deg, "Out_Degree_Cnt": deg_cnt}) out_deg_dis.drop(index=0, inplace=True) # print(out_deg_dis.head(10)) # print(out_deg_dis.shape) # As polyfit and poly1d does not work, I try to use liear reression to get the coefficient and intercept
# convert to undirected graph G7 = snap.ConvertGraph(snap.PUNGraph, G6) WccG = snap.GetMxWcc(G6) # get a subgraph induced on nodes {0,1,2,3,4,5} SubG = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4)) # get 3-core of G Core3 = snap.GetKCore(G6, 3) # delete nodes of out degree 10 and in degree 5 snap.DelDegKNodes(G6, 10, 5) # %% # stats # generate a Preferential Attachment graph on 1000 nodes and node out degree of 3 G8 = snap.GenPrefAttach(1000, 3) # vector of pairs of integers (size, count) CntV = snap.TIntPrV() # get distribution of connected components (component size, count) snap.GetWccSzCnt(G8, CntV) # get degree distribution pairs (degree, count) snap.GetOutDegCnt(G8, CntV) # vector of floats EigV = snap.TFltV() # get first eigenvector of graph adjacency matrix snap.GetEigVec(G8, EigV) # get diameter of G8 snap.GetBfsFullDiam(G8, 100) # count the number of triads in G8, get the clustering coefficient of G8 snap.GetTriads(G8) snap.GetClustCf(G8) # %%
snap.GetInDegCnt(graph, DegToCntV) for item in DegToCntV: Y.append(item.GetVal2()) X.append(item.GetVal1()) # Need proportion total = float(sum(Y)) Y = [y / total for y in Y] # Now plot it plt.loglog(X, Y, color = 'r', label = 'GitHub User-PR Network - In Degree') # Out X, Y = [], [] DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(graph, DegToCntV) for item in DegToCntV: Y.append(item.GetVal2()) X.append(item.GetVal1()) # Need proportion total = float(sum(Y)) Y = [y / total for y in Y] # Now plot it plt.loglog(X, Y, color = 'y', label = 'GitHub User-PR Network - Out Degree') # All plotting plt.xlabel('Node Degree (log)') plt.ylabel('Proportion of Nodes with a Given Degree (log)') plt.title('Degree Distribution of GitHub User-PR Network')