def Q2_1_Experiments(): # Load graphs. epinions, email = loadNetworks() # ### Email Model # Let's find the size of the largest SCC. emailSCC = snap.GetMxScc(email) print("Email SCC: %s." % (100 * float(emailSCC.GetNodes()) / email.GetNodes())) # The proposal is that 189587 is in SCC, so we have: # SCC + OUT = 19.6456446492% # OUT = 6.7492666299% # SCC + IN = 69.8402045141 # OTHER: 23.410528856 fp, bp = GetForwardBackwardProp(email, 189587) print("Forward %s and Backward %s for ID: %s." % (fp, bp, 189587)) # The proposal is that 675 is in OUT, so we have: # SCC + IN + \epsilon = 69.8454832701% fp, bp = GetForwardBackwardProp(email, 675) print("Forward %s and Backward %s for ID: %s." % (fp, bp, 675)) # We try one more random node to get a better picture. # The proposal is that 0 is in IN, so we have: # SCC + OUT + \epsilon = 19.6460217032% fp, bp = GetForwardBackwardProp(email, 0) print("Forward %s and Backward %s for ID: %s." % (fp, bp, 0)) # ### Epinions Model # Let's find the size of the largest SCC. epinionsSCC = snap.GetMxScc(epinions) print("Epinions SCC: %s." % (100 * float(epinionsSCC.GetNodes()) / epinions.GetNodes())) # Proposal: 1952 is in IN region. # SCC + OUT + \epsilon = 62.8329313776% fp, bp = GetForwardBackwardProp(epinions, 1952) print("Forward %s and Backward %s for ID: %s." % (fp, bp, 1952)) # Proposal: 9809 is in OUT region. # SCC + IN + \epsilon = 74.4079389554% fp, bp = GetForwardBackwardProp(epinions, 9809) print("Forward %s and Backward %s for ID: %s." % (fp, bp, 9809)) # Proposal: 193 is in SCC region. # SCC + OUT = 62.8316134899% # OUT = 20.365318467599997% # SCC + IN = 74.4066210678 # REST = 5.228060464599995% fp, bp = GetForwardBackwardProp(epinions, 193) print("Forward %s and Backward %s for ID: %s." % (fp, bp, 193))
def bowtie_components(graph, name): """Give sizes of DISCONNECTED, IN, OUT, SCC""" results = {} N = graph.GetNodes() SCC = snap.GetMxScc(graph) n = SCC.GetRndNId() disc = N - snap.GetMxWcc(graph).GetNodes() scc = SCC.GetNodes() SCC_in = snap.GetBfsTree(graph, n, False, True) SCC_out = snap.GetBfsTree(graph, n, True, False) in1 = SCC_in.GetNodes() - scc out = SCC_out.GetNodes() - scc tt = N - disc - scc - in1 - out results["a. SCC"] = scc results["b. IN"] = in1 results["c. OUT"] = out results["d. TENDRILS + TUBES"] = tt results["e. DISCONNECTED"] = disc print 'Total nodes in {} network: {}'.format(name, N) print 'DISCONNECTED: {}'.format(disc) print 'SCC: {}'.format(scc) print 'IN: {}'.format(in1) print 'OUT: {}'.format(out) print 'TENDRILS + TUBES: {}'.format(tt) return results
def get_shortest_path(file_path, output_path): Graph, H = load_graph(file_path) path_distr = dict() MxScc = snap.GetMxScc(Graph) tot = MxScc.GetNodes() cnt = 0 for NI in MxScc.Nodes(): NIdToDistH = snap.TIntH() shortestPath = snap.GetShortPath(MxScc, NI.GetId(), NIdToDistH, True) for ID in NIdToDistH: dist = NIdToDistH[ID] if dist in path_distr: path_distr[dist] += 1 else: path_distr[dist] = 1 cnt += 1 print '%d/%d' % (cnt, tot) dataset = list() for dist in path_distr: distr = dict() distr['dist'] = dist distr['freq'] = path_distr[dist] dataset.append(distr) dataset = pd.DataFrame(dataset) dataset = dataset[['dist', 'freq']] dataset.sort('dist', ascending=1, inplace=True) dataset.to_csv(output_path, index=False, encoding='utf-8')
def SizeOfBowtieRegions(Graph, sccNodeID): ''' Given a Graph with a BowTie structure as described in http://snap.stanford.edu/class/cs224w-readings/broder00bowtie.pdf and an sccNodeID of a node known to belong to the central SCC, determines the size of each component. returns: tuple of sizes (SCC, IN, OUT, TENDRILS, DISCONNECTED) ''' totalNodes = Graph.GetNodes() wcc = snap.GetMxWcc(Graph) assert wcc.IsNode(sccNodeID) wccNodes = wcc.GetNodes() disconnectedNodes = totalNodes - wccNodes scc = snap.GetMxScc(Graph) # Sanity check the input. assert scc.IsNode(sccNodeID) sccNodes = scc.GetNodes() sccAndOutNodes = snap.GetBfsTree(Graph, sccNodeID, True, False).GetNodes() sccAndInNodes = snap.GetBfsTree(Graph, sccNodeID, False, True).GetNodes() inNodes = sccAndInNodes - sccNodes outNodes = sccAndOutNodes - sccNodes tendrilNodes = wccNodes - (inNodes + outNodes + sccNodes) nodes = (sccNodes, inNodes, outNodes, tendrilNodes, disconnectedNodes) assert sum(nodes) == Graph.GetNodes() return nodes
def q2_1_aux(name,id): G = load_graph(name) # Your code here: OutTreeEp = snap.GetBfsTree(G, id, True, False) InTreeEp = snap.GetBfsTree(G, id, False, True) sccOneRandNodeId = snap.GetMxScc(G).GetRndNId() sccInOutTree = OutTreeEp.IsNode(sccOneRandNodeId) sccInInTree = InTreeEp.IsNode(sccOneRandNodeId) print "graph:",name print "nodeId",id OutTree = snap.GetBfsTree(G, id, True, False) InTree = snap.GetBfsTree(G, id, False, True) sizeOutTree = OutTree.GetNodes() sizeInTree = InTree.GetNodes() print "sizegraph", G.GetNodes() print "sizeOutTree", sizeOutTree print "sizeInTree", sizeInTree if (sccInOutTree): if (sccInInTree): print "node in SCC" else: print "node in IN" else: print "node in OUT"
def q1_3_grpah(Graph): n_nodes = Graph.GetNodes() MxWcc = snap.GetMxWcc(Graph) MxScc = snap.GetMxScc(Graph) n_MxWcc = MxWcc.GetNodes() n_MxScc = MxScc.GetNodes() print(" TOTAL : ", n_nodes) print(" DISCONNECTED : ", n_nodes - n_MxWcc) print(" SCC : ", n_MxScc) SCC_nodes = [] for NI in MxScc.Nodes(): SCC_nodes.append(NI.GetId()) num_test = 100 random_sampled_scc = random.sample(SCC_nodes, num_test) num_out = [] num_in = [] for i in range(0, num_test): NodeId = random_sampled_scc[i] BfsTreeOut = snap.GetBfsTree(Graph, NodeId, True, False) BfsTreeIn = snap.GetBfsTree(Graph, NodeId, False, True) num_out.append(BfsTreeOut.GetNodes()) # roughly SCC + OUT num_in.append(BfsTreeIn.GetNodes()) # roughly SCC + IN num_out.sort() num_in.sort() print(" OUT : ", num_out[-1] - n_MxScc) print(" IN : ", num_in[-1] - n_MxScc) num_tendrils = n_MxWcc - n_MxScc - (num_out[-1] - n_MxScc) - (num_in[-1] - n_MxScc) print(" TENDRILS+TUBES : ", num_tendrils)
def get_connected_component(graph): if isinstance(graph, snap.PNGraph): lcc = snap.GetMxScc(graph) # renumber the node numbers from 0 to the size-1 lcc = snap.ConvertGraph(snap.PNGraph, lcc, True) elif isinstance(graph, snap.PUNGraph): lcc = snap.GetMxWcc(graph) # renumber the node numbers from 0 to the size-1 lcc = snap.ConvertGraph(snap.PUNGraph, lcc, True) else: raise NotAGraphError(graph) return lcc
def processNetwork(Graph, id_to_groups): with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f: f.write("RELATED GROUPS GRAPH:\n") f.write('Edges: %d\n' % Graph.GetEdges()) f.write('Nodes: %d\n\n' % Graph.GetNodes()) MxWcc = snap.GetMxWcc(Graph) f.write("MAX WCC:\n") f.write('Edges: %f ' % MxWcc.GetEdges()) f.write('Nodes: %f \n' % MxWcc.GetNodes()) f.write('Node List: ') for node in MxWcc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxWcc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL WCCs:") Components = snap.TCnComV() snap.GetWccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nWcc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) MxScc = snap.GetMxScc(Graph) f.write("\n\nMAX SCC:\n") f.write('Edges: %f ' % MxScc.GetEdges()) f.write('Nodes: %f \n' % MxScc.GetNodes()) f.write('Node List: ') for node in MxScc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxScc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL SCCs:") Components = snap.TCnComV() snap.GetSccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nScc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) f.write('\n\nCLUSTERING AND COMMUNITIES:\n') f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1)) f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1)) Nodes = snap.TIntV() for node in Graph.Nodes(): Nodes.Add(node.GetId()) f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
def get_densification(df): years = sorted(df['year'].unique()) out_num_nodes = [] out_num_edges = [] # out_bfs_diameters = [] out_anf_diameters = [] for year in years: G = get_graph(df[df['year'] == year]) out_num_nodes.append(G.GetNodes()) out_num_edges.append(G.GetEdges()) scc = snap.GetMxScc(G) out_anf_diameters.append(snap.GetAnfEffDiam(scc)) return out_num_nodes, out_num_edges, out_anf_diameters, years
def get_labeled_LSCC_for_paths(file_path, output_path_LSCC, output_path_hash): Graph, H = load_graph(file_path) MxScc = snap.GetMxScc(Graph) with open(output_path_LSCC, 'w') as f: print '-----clear' with open(output_path_hash, 'w') as f: print '-----clear' f_graph = open(output_path_LSCC, 'a') f_hash = open(output_path_hash, 'a') for NI in MxScc.Nodes(): ID = NI.GetId() f_hash.write('%d %s\n' % (ID, H.GetKey(ID))) for des in NI.GetOutEdges(): f_graph.write('%d %d\n' % (ID, des)) print ID
def max_scc_evolution(graphs, name, time_units, verbose=False, duration=None): """Plot the time evolution of the number of nodes in the largest SCC of snap graph in graphs""" Y = [] for g in graphs: scc = snap.GetMxScc(g) Y.append(scc.GetNodes()) X = range(len(Y)) if duration is not None: X = range(duration[0], duration[1] + 1) plt.plot(X, Y) plt.xlabel("Time in {}".format(time_units)) plt.ylabel("Number of nodes in largest SCC") plt.title("SCC nodes evolution of {} graphs".format(name)) plt.savefig("SCCnodes_time_{}".format(name)) if verbose: plt.show()
def q2_3_util(dataset_name): # G = load_graph("email") G = load_graph(dataset_name) MxWcc = snap.GetMxWcc(G) total_size = G.GetNodes() wcc_size = MxWcc.GetNodes() disconnected_size = total_size - wcc_size print 'Total size: ', total_size print 'WCC size: ', wcc_size print 'DISCONNECTED: ', disconnected_size Rnd = snap.TRnd(42) Rnd.Randomize() MxScc = snap.GetMxScc(G) scc_size = MxScc.GetNodes() number_of_trials = 1 scc_plus_out = 0 scc_plus_in = 0 out_size = 0 in_size = 0 tendrils_plus_tubes = 0 for i in xrange(number_of_trials): NId = MxScc.GetRndNId(Rnd) # print 'Random node id', NId outward_set = set() BfsTree = snap.GetBfsTree(G, NId, True, False) for EI in BfsTree.Edges(): outward_set.add(EI.GetDstNId()) scc_plus_out = max(scc_plus_out, len(outward_set)) out_size = max( out_size, scc_plus_out - scc_size) # inward_set = set() BfsTree = snap.GetBfsTree(G, NId, False, True) for EI in BfsTree.Edges(): inward_set.add(EI.GetDstNId()) scc_plus_in = max(scc_plus_in, len(inward_set)) in_size = max(in_size, scc_plus_in - scc_size) tendrils_plus_tubes = max(tendrils_plus_tubes, wcc_size - in_size - out_size) print 'IN: ', in_size print 'scc_size', scc_size print 'scc + out: ', scc_plus_out print 'OUT: ', out_size print 'scc + in: ', scc_plus_in print 'TENDRILS + TUBES', tendrils_plus_tubes print '------------------'
def analyze_network( k=1000, fanout=1, fanout_samples=1, graph_in_path='bad_actors.graph' ): """ @params: [k (int), graph_in_path (str)] @returns: None Loads a network from 'graph_in_path' and prints basic information about the network. Samples k edges from the network to visualize using networkx. """ graph = snap.TNEANet.Load(snap.TFIn(graph_in_path)) snap.PrintInfo(graph, 'Basic Graph Information', '/dev/stdout', False) MxScc = snap.GetMxScc(graph) print('Nodes in largest strongly-connected subcomponent: %d' % MxScc.GetNodes() ) visualize_k_random_users(k, fanout, fanout_samples, graph)
def q2_3_aux(name): G = load_graph(name) SCC = snap.GetMxScc(G).GetNodes() wcc = snap.GetMxWcc(G).GetNodes() inexplosionVect = emIn if name == "email" else epIn outexplosionVect = emOut if name == "email" else epOut ineexpl = inexplosionVect[-1] outeexpl = outexplosionVect[-1] IN = ineexpl - SCC OUT = outeexpl - SCC DISCONNECTED = G.GetNodes()-wcc TENDRILS_AND_TUBES = wcc - IN - OUT - SCC print name,"DISCONNECTED:",DISCONNECTED,"IN:",IN,"OUT:",OUT,"SCC:",SCC,"TENDRILS + TUBES:",TENDRILS_AND_TUBES return
def main(): citation = False if citation: folder = '../data/citation_networks/' else: folder = '../data/networks/' AssigneeGraphs = load_networks(folder) print "Generating features..." for AGraph in tqdm(AssigneeGraphs): # Calculate network features Graph = AGraph.Graph node_count = Graph.GetNodes() if node_count <= 0: print "0 nodes", AGraph.company_name continue edge_count = Graph.GetEdges() cc = snap.GetClustCf(Graph) Components = snap.TCnComV() snap.GetSccs(Graph, Components) num_sccs = len(Components) MxScc = snap.GetMxScc(Graph) max_scc_proportion = float(MxScc.GetNodes()) / node_count avg_patents_per_inventor =float(AGraph.metadata['number_of_patents']) / node_count modularity = get_modularity(Graph) net_stats = NetworkStats(node_count=node_count, edge_count=edge_count, clustering_cf=cc, num_sccs=num_sccs, max_scc_proportion=max_scc_proportion, avg_patents_per_inventor=avg_patents_per_inventor, modularity=modularity) AGraph.metadata['node_count'] = node_count AGraph.metadata['edge_count'] = edge_count AGraph.metadata['clustering_cf'] = cc AGraph.metadata['num_sccs'] = num_sccs AGraph.metadata['max_scc_proportion'] = max_scc_proportion AGraph.metadata['avg_patents_per_inventor'] = avg_patents_per_inventor AGraph.metadata['modularity'] = modularity with open(folder + AGraph.company_name + '.json', 'w') as fp: json.dump(AGraph.metadata, fp, sort_keys=True, indent=4) print len(AssigneeGraphs)
def calc_net_stats(folder): stats = [] print "Loading features..." for AGraph in tqdm(AssigneeGraphs): # Calculate network features Graph = AGraph.Graph node_count = Graph.GetNodes() if node_count <= 0: # print "0 nodes", AGraph.company_name continue edge_count = Graph.GetEdges() cc = snap.GetClustCf(Graph) Components = snap.TCnComV() snap.GetSccs(Graph, Components) num_sccs = len(Components) MxScc = snap.GetMxScc(Graph) max_scc_proportion = float(MxScc.GetNodes()) / node_count avg_patents_per_inventor =float(AGraph.metadata['number_of_patents']) / node_count modularity = get_modularity(Graph) net_stats = NetworkStats(node_count=node_count, edge_count=edge_count, clustering_cf=cc, num_sccs=num_sccs, max_scc_proportion=max_scc_proportion, avg_patents_per_inventor=avg_patents_per_inventor, modularity=modularity) stats.append(net_stats) return stats
def analyze_graph(G): WCC = snap.GetMxWcc(G) SCC = snap.GetMxScc(G) id = SCC.GetRndNId() out_tree = snap.GetBfsTree(G, id, True, False) in_tree = snap.GetBfsTree(G, id, False, True) G_size = G.GetNodes() SCC_size = SCC.GetNodes() WCC_size = WCC.GetNodes() DISCONNECTED_size = G_size - WCC_size in_size = in_tree.GetNodes() - SCC_size out_size = out_tree.GetNodes() - SCC_size Tendril_size = G_size - SCC_size - DISCONNECTED_size - in_size - out_size print 'Total Graph Size: %d' % G_size print 'SCC Size: %d' % SCC_size print 'WCC Size: %d' % WCC_size print 'IN Size: %d' % in_size print 'OUT Size: %d' % out_size print 'DISCONNECTED Size: %d' % DISCONNECTED_size print 'Tendril tube size (remaining): %d' % Tendril_size print()
def per_graph(graph, name): mxWcc = snap.GetMxWcc(graph) mxScc = snap.GetMxScc(graph) print '' print 'Size analysis on {}'.format(name) print 'Disconnected size = {}'.format(graph.GetNodes() - mxWcc.GetNodes()) print 'SCC size = {}'.format(mxScc.GetNodes()) trials = 200 avg_reached_out = 0 avg_reached_in = 0 for _ in range(trials): nodeId = mxScc.GetRndNId() avg_reached_out += snap.GetBfsTree(graph, nodeId, True, False).GetNodes() avg_reached_in += snap.GetBfsTree(graph, nodeId, False, True).GetNodes() scc_out = float(avg_reached_out) / trials scc_in = float(avg_reached_in) / trials out_sz = scc_out - mxScc.GetNodes() in_sz = scc_in - mxScc.GetNodes() print 'OUT size = {}'.format(out_sz) print 'IN size = {}'.format(in_sz) print 'Tendrils/Tubes size = {}'.format(mxWcc.GetNodes() - mxScc.GetNodes() - out_sz - in_sz)
def q2_1(): ''' You will have to run the inward and outward BFS trees for the respective nodes and reason about whether they are in SCC, IN or OUT. You may find the SNAP function GetBfsTree() to be useful here. ''' ########################################################################## #TODO: Run outward and inward BFS trees from node 2018, compare sizes #and comment on where node 2018 lies. G = load_graph("email") #Your code here: outward_set = set() BfsTree = snap.GetBfsTree(G, 2018, True, False) for EI in BfsTree.Edges(): outward_set.add(EI.GetDstNId()) # print "Edge from %d to %d in generated tree." % (EI.GetSrcNId(), EI.GetDstNId()) inward_set = set() BfsTree = snap.GetBfsTree(G, 2018, False, True) for EI in BfsTree.Edges(): inward_set.add(EI.GetDstNId()) # print "Edge from %d to %d in generated tree." % (EI.GetSrcNId(), EI.GetDstNId()) print('inward_set', len(inward_set)) print('outward_set', len(outward_set)) print('G size', G.GetEdges()) MxScc = snap.GetMxScc(G) mxSccSize = MxScc.GetNodes() print 'SCC size:', mxSccSize print 'Relative size of SCC in Directed Graph:', snap.GetMxSccSz(G) ########################################################################## ########################################################################## #TODO: Run outward and inward BFS trees from node 224, compare sizes #and comment on where node 224 lies. G = load_graph("epinions") #Your code here: #Your code here: outward_set = set() BfsTree = snap.GetBfsTree(G, 224, True, False) for EI in BfsTree.Edges(): outward_set.add(EI.GetDstNId()) # print "Edge from %d to %d in generated tree." % (EI.GetSrcNId(), EI.GetDstNId()) inward_set = set() BfsTree = snap.GetBfsTree(G, 224, False, True) for EI in BfsTree.Edges(): inward_set.add(EI.GetDstNId()) # print "Edge from %d to %d in generated tree." % (EI.GetSrcNId(), EI.GetDstNId()) print('inward_set', len(inward_set)) print('outward_set', len(outward_set)) print('G size', G.GetEdges()) print 'Relative size of SCC in Directed Graph:', snap.GetMxSccSz(G) ########################################################################## print '2.1: Done!\n'
l.append((src, dst)) l.sort() #for item in l: # print("G7\t%d\t%d" % (item[0], item[1])) WccG6 = snap.GetMxWcc(G6) print("type(WccG6) %s" % (type(WccG6))) print("WccG6 nodes %d, edges %d" % (WccG6.GetNodes(), WccG6.GetEdges())) WccG7 = snap.GetMxWcc(G7) print("type(WccG7) %s" % (type(WccG7))) print("WccG7 nodes %d, edges %d" % (WccG7.GetNodes(), WccG7.GetEdges())) SccG6 = snap.GetMxScc(G6) print("type(SccG6) %s" % (type(SccG6))) print("SccG6 nodes %d, edges %d" % (SccG6.GetNodes(), SccG6.GetEdges())) SccG7 = snap.GetMxScc(G7) print("type(SccG7) %s" % (type(SccG7))) print("SccG7 nodes %d, edges %d" % (SccG7.GetNodes(), SccG7.GetEdges())) SubG6 = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4)) print("type(SubG6) %s" % (type(SubG6))) print("SubG6 nodes %d, edges %d" % (SubG6.GetNodes(), SubG6.GetEdges())) for EI in SubG6.Edges(): print("edge (%d, %d)" % (EI.GetSrcNId(), EI.GetDstNId())) Core3G6 = snap.GetKCore(G6, 3) print("type(Core3G6) %s" % (type(Core3G6)))
nx.draw(nxG, with_labels=True) fig.savefig(str(dir) + "\\role" + str(role) + "node" + str(id) + id_to_word[id].name() + name + ".png") plt.close(fig) G0 = generate_word_graph(True, False, False, 0) snap.SaveEdgeList(G0, "G0.txt", "") G1 = generate_word_graph(True, False, False, 1) snap.SaveEdgeList(G1, "G1.txt", "") G2 = generate_word_graph(True, False, False, 2) snap.SaveEdgeList(G2, "G2.txt", "") print(meme) PolyG, Polyid, Polysynset, _,_,_ = generate_meaning_graph(False, True, False) print(snap.GetMxScc(PolyG).GetNodes()) HypG, Hypid, Hypsynset, _,_,_ = generate_meaning_graph(True, False, False) HoloG, Holoid, Holosynset, _,_,_ = generate_meaning_graph(False, False, True) for k in Polyid: if Polyid[k] != Hypid[k]: print("oh no") W = extract_roles(create_node_vectors([HypG, PolyG, HoloG], 3), 12) print(W.shape) roles = [] counts = dict() nodes = dict() for i in range(W.shape[0]): role = np.argmax(W[i]) roles.append(role)
import snap import sys # Simple script to re-index to 0-indexed graph. graph = sys.argv[1] if len(sys.argv) > 2 and sys.argv[2] == 1: Gin = snap.LoadEdgeList(snap.PUNGraph, graph) else: Gin = snap.LoadEdgeList(snap.PNGraph, graph) MxScc = snap.GetMxScc(Gin) Gout = snap.ConvertGraph(snap.PNGraph, MxScc, True) print 'Number of nodes: ', Gout.GetNodes() print 'Number of edges: ', Gout.GetEdges() snap.SaveEdgeList(Gout, graph)
backwardTree = snap.GetBfsTree(Graph, nodeID, False, True) return (float(forwardTree.GetNodes()) / Graph.GetNodes(), float(backwardTree.GetNodes()) / Graph.GetNodes()) # In[104]: # Load graphs. epinions, email = loadNetworks() # ### Email Model # In[119]: # Let's find the size of the largest SCC. emailSCC = snap.GetMxScc(email) print("Email SCC: %s." % (100 * float(emailSCC.GetNodes()) / email.GetNodes())) # In[120]: # The proposal is that 189587 is in SCC, so we have: # SCC + OUT = 19.6456446492% # OUT = 6.7492666299% # SCC + IN = 69.8402045141% # IN = 56.9438264948% # OTHER: 23.410528856% fp, bp = GetForwardBackwardProp(email, 189587) print("Forward %s and Backward %s for ID: %s." % (fp, bp, 189587)) # In[121]:
from snap import TUNGraph import time from datetime import timedelta import sys def calculate_shortest_path_lengths_distribution(graph, hashtag): start = time.time() print("Calculating shortest path lengths distribution...") snap.PlotShortPathDistr(graph, hashtag + "_shortestPathLengthsDist", "Shortest Path Lengths Distribution") end = time.time() print("Completed in: %s" % timedelta(seconds=(int(end - start)))) if __name__ == '__main__': if len(sys.argv) != 2: print("Must specify hashtag") sys.exit(1) hashtag = sys.argv[1] # Import the hashtag subgraph to work on FIn = snap.TFIn("../../data/mmr_subgraph_" + hashtag + ".bin") hashtag_subgraph = TUNGraph.Load(FIn) # Get max connected component hashtag_subgraph = snap.GetMxScc(hashtag_subgraph) # Start computation calculate_shortest_path_lengths_distribution(hashtag_subgraph, hashtag)
p.GetVal2(), p.GetVal1()) num_cc += p.GetVal2() print num_cc, "total strongly connected components" print snap.GetWccSzCnt(repliesgraph, CntV) num_cc = 0 for p in CntV: print "{0} weakly connected component(s) of size {1}".format( p.GetVal2(), p.GetVal1()) num_cc += p.GetVal2() print num_cc, "total weakly connected components" print #properties of largest strongly connected component big_scc = snap.GetMxScc(repliesgraph) snap.PrintInfo(big_scc, "Largest strongly connected component") num_dir_edges = snap.CntUniqDirEdges(big_scc) print "{0:.2f}% of directed edges are reciprocal".format( snap.CntUniqBiDirEdges(big_scc) * 2 * 100 / num_dir_edges) print "The clustering coefficient is {0:.2f}%".format( snap.GetClustCf(big_scc) * 100) print "The diameter is approximately {0}".format( snap.GetBfsFullDiam(big_scc, 1000)) #store CC for graphviz snap.SaveGViz(big_scc, filename + ".dot", "Largest Connected Component") print "Saved GraphViz"
plot_filedir = os.path.join(plotpath, plot_filename) plt.figure() plt.scatter(list(shortest_path_dist.keys()), list(shortest_path_dist.values()), s=10) plt.xlabel("Shortest Path Length") plt.ylabel("Frequency") plt.title("Shortest Path Distribution ({})".format(graph_filename[:-6])) plt.savefig(plot_filedir) """ FOR FASTER COMPUTATION, UNCOMMENT THE FOLLOWING LINE AND COMMENT OUT LINE 107-125 """ # snap.PlotShortPathDistr(G, "shortest_path_{}".format(graph_filename[:-6]), "Shortest Path Distribution ({})".format(graph_filename[:-6])) # [4] Components of the network SCC = snap.GetMxScc(G) print("Fraction of nodes in largest connected component: {}".format( round(SCC.GetNodes() / G.GetNodes(), 4))) Edge_Bridge = snap.TIntPrV() snap.GetEdgeBridges(G, Edge_Bridge) print("Number of edge bridges: {}".format(len(Edge_Bridge))) ArticulationPoint = snap.TIntV() snap.GetArtPoints(G, ArticulationPoint) print("Number of articulation points: {}".format(len(ArticulationPoint))) CComp = snap.TIntPrV() snap.GetSccSzCnt(G, CComp) connected_component = {} for comp in CComp:
def main(): #Loading the graph epinions = snap.LoadEdgeList(snap.PNGraph, "soc-Epinions1.txt", 0, 1) pr = PageRank(epinions, 0.8, 0.001) #calling page rank function #print pr #getting number of strongly connected components in the graph scc = snap.GetMxScc(epinions) #Storing SCC nodes id's in an array sccNodes = [] for nodes in scc.Nodes(): sccNodes.append(nodes.GetId()) #storing total nodes nodeList = [] for node in epinions.Nodes(): nodeList.append(node.GetId()) rankDesc = [] rankIds = [] #Computing top rank nodes for index, element in enumerate(pr): b, c = element rankDesc.append(b) rankIds.append(nodeList[index]) rankDesc.sort(reverse=True) rankIds.sort(reverse=True) topRankNodes = rankDesc[0:10] topIds = rankIds[0:10] print "Top Rank Nodes: ", topRankNodes # Number of incoming edges (indegree of x) #Ranks of all the source pages having hyperlinks toward x for index, element in enumerate(topIds): currentNode = epinions.GetNI(topIds[index]) x = currentNode.GetInDeg() for i in range(x): innerNode = currentNode.GetInNId(i) indi = nodeList.index(innerNode) ele = pr[indi] print "In Degree: ", innerNode, "w.r.t. node: ", x, "Rank: ", ele #printing number of strongly connected components in the graph print "Number of nodes in SCC: ", scc.GetNodes() #Applying a BFS to get the Out Set from node 1 BfsOutSet = snap.GetBfsTree(epinions, sccNodes[0], True, False) #storing Out Set nodes in an array bfsOutNodes = [] for nodes in BfsOutSet.Nodes(): if (nodes.GetId() not in sccNodes): bfsOutNodes.append(nodes.GetId()) #removing the SCC to get the Out Set Nodes for outNode in BfsOutSet.Nodes(): if outNode.GetId() in sccNodes: BfsOutSet.DelNode(outNode.GetId()) print "Number of OutSet Nodes: ", BfsOutSet.GetNodes() #applying BFS search to find the tendrils in Out Set outSetTen = snap.GetBfsTree(BfsOutSet, bfsOutNodes[0], False, True) print "Tendrils in OutSet: ", outSetTen.GetNodes() #storing out set tendrils in an array to use it later outTendrils = [] for node in outSetTen.Nodes(): outTendrils.append(node.GetId()) #applying BFS to get in set nodes BfsInSet = snap.GetBfsTree(epinions, sccNodes[0], False, True) #storing In Set nodes in an array bfsInNodes = [] for nodes in BfsInSet.Nodes(): if (nodes.GetId() not in sccNodes): bfsInNodes.append(nodes.GetId()) #removing the SCC to get the Out Set Nodes for inNode in BfsInSet.Nodes(): if inNode.GetId() in sccNodes: BfsInSet.DelNode(inNode.GetId()) print "Number of InSet Nodes: ", BfsInSet.GetNodes(), "clone:", len( bfsInNodes) #applying BFS search to find the tendrils in Out Set inSetTen = snap.GetBfsTree(BfsInSet, bfsInNodes[0], False, True) print "Tendrils in InSet: ", inSetTen.GetNodes() #storing out set tendrils in an array to use it later inTendrils = [] for node in inSetTen.Nodes(): inTendrils.append(node.GetId()) #tubes in a SCC tubeNodes = [] for nodes in inSetTen.Nodes(): if nodes in outSetTen.Nodes(): tubeNodes.append(nodes.GetId()) print "Tubes in SCC: ", len(tubeNodes) #storing disconnected region in an array disComp = [] for nodes in epinions.Nodes(): if (nodes.GetId() not in sccNodes) and ( nodes.GetId() not in bfsOutNodes) and (nodes.GetId() not in bfsInNodes) and ( nodes.GetId() not in inTendrils) and (nodes.GetId() not in outTendrils): disComp.append(nodes.GetId()) print "Number of Disconnected Components: ", len(disComp) probabilities = Random(epinions, 5) probabilities, nodes = Random(epinions, 5) plt.plot() plt.plot(nodes, probabilities) plt.xlabel('No of Nodes') plt.ylablel('Probability that path exists') plt.show()
plt.subplot(222) plt.semilogy(X,Y, drawstyle = 'steps') plt.xlabel('Frac. of Starting Nodes') plt.ylabel('number of nodes reached (log)') plt.title('Epinions: Reachability using Out-links') plt.grid(True) plt.show() ################################################################### #2.3 ################################################################### #Calculate the values for email graph total_nodes = G1.GetNodes() largest_scc = snap.GetMxScc(G1) SCC = largest_scc.GetNodes() random_nid_in_scc = largest_scc.GetRndNId() ##Find the out − and in −components outcomp = snap.GetBfsTree(G1, random_nid_in_scc, True, False) incomp = snap.GetBfsTree(G1, random_nid_in_scc, False, True) sz_outcomp = outcomp.GetNodes() sz_incomp = incomp.GetNodes() G_WCC = snap.GetMxWcc(G1) WCC = G_WCC.GetNodes() disconnected = total_nodes - WCC IN = sz_incomp − SCC OUT = sz_outcomp − SCC
import snap import sys import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt input_file = sys.argv[1] SubGraph = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1) cc = set() closeness = dict() Graph = snap.GetMxScc(SubGraph) for node in Graph.Nodes(): print node.GetId() Clcentr = snap.GetClosenessCentr(Graph, node.GetId()) closeness[node.GetId()] = Clcentr
# Plotting the distribution of shortest Length snap.PlotShortPathDistr(p2p_gnutella04_subgraph, "p2p-Gnutella04-subgraph", "Undirected graph - shortest path") print "Shortest path distribution of p2p-Gnutella04-subgraph is in :" + "diam.p2p-Gnutella04-subgraph.png" ## Task 1.2.4 # Task 1.2.4.1 if (sub_graph_name == "soc-Epinions1-subgraph"): # Finding the components of the network # Calculating the fraction of largest connected component largest_connected = snap.GetMxScc(soc_epinions1_subgraph) node = 0 for i in largest_connected.Nodes(): node = node + 1 print "Fraction of nodes in largest connected component in soc-Epinions1-subgraph :" + str( round(node * 1.0 / len(v1), 3)) if (sub_graph_name == "cit-HepPh-subgraph"): # Finding the components of the network # Calculating the fraction of largest connected component largest_connected = snap.GetMxScc(cit_heph_subgraph)