def gen_D(Pi, V_exo, theta2): """ Returns a triplet of three snap graphs: D = opportunity graph with robust links removed. Pi_minus = subgraph of Pi without robustly absent potential links. Pi_exo = subgraph of Pi with only robust links. NB: This function is specific to the joint surplus used in our simulations. Pi = opportunity graph (in our case, the output of gen_RGG). V_exo = 'exogenous' part of joint surplus (output of gen_V_exo). theta2 = transitivity parameter (theta[2]). """ N = V_exo.shape[0] D = snap.ConvertGraph(snap.PUNGraph, Pi) Pi_minus = snap.ConvertGraph(snap.PUNGraph, Pi) Pi_exo = snap.GenRndGnm(snap.PUNGraph, N, 0) for edge in Pi.Edges(): i = min(edge.GetSrcNId(), edge.GetDstNId()) j = max(edge.GetSrcNId(), edge.GetDstNId()) if V_exo[i, j] + min(theta2, 0) > 0: D.DelEdge(i, j) Pi_exo.AddEdge(i, j) if V_exo[i, j] + max(theta2, 0) <= 0: D.DelEdge(i, j) Pi_minus.DelEdge(i, j) return (D, Pi_minus, Pi_exo)
def tuntoall(): FIn = snap.TFIn(NW.twitter_binary) G = snap.TUNGraph.Load(FIn) t0 = t() # convert undirected graph to directed GOut = snap.ConvertGraph(snap.PNGraph, G) t1 = reportTime(t0, "convert TUNGRAPH to TNGRAPH") # convert directed graph to a network GOut = snap.ConvertGraph(snap.PNEANet, G) reportTime(t1, "convert TUNGRAPH to TNEANet")
def get_connected_component(graph): if isinstance(graph, snap.PNGraph): lcc = snap.GetMxScc(graph) # renumber the node numbers from 0 to the size-1 lcc = snap.ConvertGraph(snap.PNGraph, lcc, True) elif isinstance(graph, snap.PUNGraph): lcc = snap.GetMxWcc(graph) # renumber the node numbers from 0 to the size-1 lcc = snap.ConvertGraph(snap.PUNGraph, lcc, True) else: raise NotAGraphError(graph) return lcc
def main (): import json import snap import graphviz import matplotlib.pyplot as plt import numpy as np import xlrd #----------------- #The common area rumor_number = "21" path_input = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\' workbook_input1_D = xlrd.open_workbook(path_input + 'DATASET.xlsx', on_demand = True) path_jsonl = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\Rumor_' + rumor_number + '.jsonl' path_graph = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\Rumor_' + rumor_number + '.graph' path_output = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Output\\' FIn = snap.TFIn(path_graph) G_Directed = snap.TNGraph.Load(FIn) G_Directed_with_Attributes = snap.ConvertGraph(snap.PNEANet, G_Directed) #Convert Directed Graph to Directed Graph with attributes: it means now we can assign attributes to the graph nodes G_Directed_with_Attributes = Get_Graph_with_Attributes_New (path_jsonl, G_Directed_with_Attributes, workbook_input1_D) #----------------- #The specific area snap.PrintInfo(G_Directed_with_Attributes, "Python type PNEANet", path_output + "S18_5_Output.txt", False)
def set_degree_proportional_thresholds(graph, value): g = snap.ConvertGraph(snap.PNEANet, graph) print("Number of graph nodes: ", g.GetNodes()) for n in g.Nodes(): g.AddIntAttrDatN(n.GetId(), math.floor(n.GetDeg() * value) + 1, "threshold") return g
def set_random_threshold(graph): g = snap.ConvertGraph(snap.PNEANet, graph) for n in g.Nodes(): max = n.GetDeg() + int((n.GetDeg() / 100) * 20 + 1) random_value = random.randint(0, max) g.AddIntAttrDatN(n.GetId(), random_value, "threshold") #print("Threshold of the node ", n.GetId()," with value", g.GetIntAttrDatN(n.GetId(),"threshold")) return g
def transform_directed_to_undirected(): GUn = snap.ConvertGraph(snap.PUNGraph, G) snap.PrintInfo(GUn, "Tweets UN stats", "Tweets_UN_info.txt", False) f = open('Tweets_UN_info.txt', 'r') file_contents = f.read() #print(file_contents) f.close() return GUn
def proportional_to_the_degree_threshold_assignment(g): g = snap.ConvertGraph(snap.PNEANet, g) for n in g.Nodes(): deg = n.GetDeg() value = 5 if deg > 0: value = int((1 / (deg + value)) * (g.GetEdges() / g.GetNodes())) g.AddIntAttrDatN(n.GetId(), value, "threshold") return g
def join_subgraphs_EB(subgraph1, subgraph2, nmE, nmN): c = snap.ConvertGraph(type(subgraph2), subgraph2) if nmN: c.AddNode(nmN) c.AddEdge(nmE[0], nmE[1]) return c
def snowball_sample(G, num_waves, seeds): """ Parameters: G - SNAP graph or network to sample frpm num_waves - number of snowball waves seeds - SNAP vector (TIntV) of seeds (node ids) to start snowball sample from Return value: SNAP network (TNEANet) snowball sampled from G with each node having an integer "zone" attribute for snowball sampling zone (0=seed, 1=first wave, etc.) [TNEANet needed to allow zone attribute, not actually using multigraph capability]. Note directions on directed graph are ignored - can sample in undirected or directed graph. """ assert (len(seeds) == len(set(seeds))) # no duplicate node ids # It seems like GetSubGraph does not preserve node attributse # so instead of adding attributes ot nodes on N, make a Python # dictionary mapping node ids to zone and then add them back # ass attributes on the subgraph (node ids are preserved so we # can do this) zonedict = dict() # map nodeid : zone N = snap.ConvertGraph(snap.PNEANet, G) # copy graph/network G to network N nodes = set(seeds) # will accumulate all nodes (including seeds) here for seed in seeds: zonedict[seed] = 0 # seed nodes are zone 0 newNodes = set(nodes) for i in range(num_waves): wave = i + 1 #print 'wave',wave for node in set(newNodes): neighbours = snap.TIntV() snap.GetNodesAtHop(G, node, 1, neighbours, False) # neighbours of node newNeighbours = set( neighbours) - nodes # neighbours that are not already in nodes for node in newNeighbours: if not zonedict.has_key(node): zonedict[node] = wave newNodes.update( newNeighbours ) # newNodes gets set union of itslf and newNeighbours nodes.update(newNodes) # have to convert nodes set into TIntV for use in SNAP NodeVec = snap.TIntV() for node in nodes: NodeVec.Add(node) sampleN = snap.GetSubGraph(N, NodeVec) # now put the zones as attributes on the subgraph nodes (which depends # on nodeids being preserved in the subgraph) sampleN.AddIntAttrN("zone", -1) # add zone attribute init to -1 for (nodeid, zone) in zonedict.iteritems(): sampleN.AddIntAttrDatN(nodeid, zone, "zone") return sampleN
def getEdgeBridges(network): UGraph = snap.ConvertGraph(snap.PUNGraph, network) EdgeV = snap.TIntPrV() snap.GetEdgeBridges(UGraph, EdgeV) for edge in EdgeV: print("edge: (%d, %d)" % (edge.GetVal1(), edge.GetVal2())) print(len(EdgeV)) return EdgeV
def estimate4SubgraphFrequencies(Network, connected=True): subgraph_counts = np.zeros(10) # 0 -> 0 edges # 1 -> 1 edge # 2 -> 2 adjacent edges # 3 -> 2 non-adjacent edges # 4 -> 3-star # 5 -> 3-path # 6 -> tailed triangle # 7 -> 4-cycle # 8 -> chordal 4-cycle # 9 -> 4-clique G = snap.ConvertGraph(snap.PUNGraph, Network) for _ in range(num_samples): sG = snap.GetRndSubGraph(G, 4) num_edges = sG.GetEdges() if connected and num_edges < 3: continue if num_edges == 0: subgraph_counts[0] += 1 elif num_edges == 1: subgraph_counts[1] += 1 elif num_edges == 2: maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg() if maxdeg == 2: subgraph_counts[2] += 1 else: subgraph_counts[3] += 1 elif num_edges == 3: maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg() if maxdeg == 3: subgraph_counts[4] += 1 else: subgraph_counts[5] += 1 elif num_edges == 4: maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg() if maxdeg == 3: subgraph_counts[6] += 1 else: subgraph_counts[7] += 1 elif num_edges == 5: subgraph_counts[8] += 1 else: subgraph_counts[9] += 1 return list(subgraph_counts / sum(subgraph_counts))
def community_detection(input, output): print("Loading graph...") FIn = snap.TFIn(input) graph = snap.TNGraph.Load(FIn) ugraph = snap.ConvertGraph(snap.PUNGraph, graph) print("Performing community detection...") CmtyV = snap.TCnComV() modularity = snap.CommunityCNM(ugraph, CmtyV) print("Modularity:", modularity) with open(output, "w") as file: for Cmty in CmtyV: file.write(repr([NI for NI in Cmty])) file.write("\n")
def deferred_decision(G, probs, dist): graph = snap.ConvertGraph(snap.PUNGraph, G) for e in graph.Edges(): if dist == 'uniform': x = np.random.uniform() else: x = np.random.normal() src = e.GetSrcNId() dst = e.GetDstNId() if x < probs[(src, dst)]: graph.DelEdge(src, dst) return graph
def set_median_threshold(graph): g = snap.ConvertGraph(snap.PNEANet, graph) data = [] print("Number of graph nodes: ", g.GetNodes()) count = 0 for n in g.Nodes(): data.append(n.GetDeg()) value = median(data) print("The median value is: ", value) for n in g.Nodes(): g.AddIntAttrDatN(n.GetId(), value, "threshold") print("Threshold of the node ", n.GetId(), " with value ", g.GetIntAttrDatN(n.GetId(), "threshold")) if n.GetDeg() < value: count += 1 print("Number of nodes below the median: ", count) return g
def visualiseGraph(rowData, activityCodeList, fileName, title, undirect_conversion=False): columnList = generateTransition(activityCodeList) G1 = snap.TNGraph.New() checkActivityList = [] for i in columnList: if i[1] in rowData.index: if rowData[i[1]] > 0: if i[0][0] not in checkActivityList: G1.AddNode(i[0][0]) checkActivityList.append(i[0][0]) if i[0][1] not in checkActivityList: G1.AddNode(i[0][1]) checkActivityList.append(i[0][1]) G1.AddEdge(i[0][0],i[0][1]) if undirect_conversion: G1 = snap.ConvertGraph(snap.PUNGraph,G1) snap.DrawGViz(G1, snap.gvlDot, "graphs/" + "/" + fileName + ".png", title, True)
def estimate3SubgraphFrequencies(Network): G = snap.ConvertGraph(snap.PNGraph, Network) subgraph_counts = np.zeros(7) # 0 -> 0 edges # 1 -> 1 edge # 2 -> 2 edges to same node # 3 -> 2 edges from same node # 4 -> 2 edges though one node # 5 -> 3 edge cycle # 6 -> 3 edge, not cycle for _ in range(num_samples): sG = snap.GetRndSubGraph(G, 3) num_edges = sG.GetEdges() if num_edges == 0: subgraph_counts[0] += 1 elif num_edges == 1: subgraph_counts[1] += 1 elif num_edges == 2: max_indeg = sG.GetNI(snap.GetMxInDegNId(sG)).GetInDeg() max_outdeg = sG.GetNI(snap.GetMxOutDegNId(sG)).GetOutDeg() if max_indeg == 2: subgraph_counts[2] += 1 elif max_outdeg == 2: subgraph_counts[3] += 1 else: subgraph_counts[4] += 1 else: max_indeg = sG.GetNI(snap.GetMxInDegNId(sG)).GetInDeg() if max_indeg == 1: subgraph_counts[5] += 1 else: subgraph_counts[6] += 1 return list(subgraph_counts / sum(subgraph_counts))
def girvin_neuman_profile_extract(rowData, activityCodeList, index,week): columnList = generateTransition(activityCodeList) G1 = snap.TNGraph.New() checkActivityList = [] # for node1 in activityCodeList: # for node2 in activityCodeList: # a = node1[1] + '-' + node2[1] # if a in rowData.index: # if node1[0] not in checkActivityList: # G1.AddNode(node1[0]) # checkActivityList.append(node1[0]) # if node2[0] not in checkActivityList: # G1.AddNode(node2[0]) # checkActivityList.append(node2[0]) for i in columnList: if i[1] in rowData.index: if rowData[i[1]] > 0: if i[0][0] not in checkActivityList: G1.AddNode(i[0][0]) checkActivityList.append(i[0][0]) if i[0][1] not in checkActivityList: G1.AddNode(i[0][1]) checkActivityList.append(i[0][1]) G1.AddEdge(i[0][0],i[0][1]) G1_undirect = snap.ConvertGraph(snap.PUNGraph,G1) # snap.DrawGViz(G1_undirect, snap.gvlDot, "graphs/week/" + str(week) + "/" + index + ".png", index) CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G1_undirect, CmtyV) noOfCluster = len(CmtyV) clusterList = [] for Cmty in CmtyV: community = [] for NI in Cmty: community.append(NI) clusterList.append(community) return [index, modularity, noOfCluster, clusterList]
#def getCentralities(network): network = loadGraph() nameToNId = {} uIdToNId = {} for n in network.Nodes(): id = n.GetId() nameToNId[network.GetStrAttrDatN(id, 'name').decode('utf-8')] = id infile = codecs.open('csv/dblpusersaff.csv', 'r', 'utf-8') lines = infile.read().splitlines() infile.close() for line in lines: tokens = line.split('||') if tokens[2] != '': nId = nameToNId[tokens[1]] uIdToNId[int(tokens[0])] = nId graph = snap.ConvertGraph(snap.PUNGraph, network) degCenters = {} closeCenters = {} pageRanks = snap.TIntFltH() eigenCenters = snap.TIntFltH() # btwnCenters = snap.TIntFltH() # edgeHash = snap.TIntPrFltH() print('Running PageRank...') snap.GetPageRank(graph, pageRanks) print('Running Eigenvector centrality...') snap.GetEigenVectorCentr(graph, eigenCenters) # print('Running Betweeness...') # snap.GetBetweennessCentr(graph, btwnCenters, edgeHash) print('Running Degree and Closeness...') for uId, nId in uIdToNId.iteritems(): print uId, nId
def set_fixed_threshold(graph, value): g = snap.ConvertGraph(snap.PNEANet, graph) for n in g.Nodes(): g.AddIntAttrDatN(n.GetId(), value, "threshold") return g
for x in parsed: vid = x.videoid for v in list(x.related) + [vid]: if v not in self.nodeid: self.nodeid[v] = self.size self.videoid[self.size] = v self.size += 1 #filenames = [ "0301/{}.txt".format(i) for i in range(0, 4) ] #data = Data(filenames) #graph = make_graph(data) #save_graph_data(data, graph, "try") data, graph = load_graph_data("try") Graph = snap.ConvertGraph(snap.PUNGraph, graph) NId1 = snap.GetMxDegNId(Graph) NIdToDistH = snap.TIntH() shortestPath = snap.GetShortPath(Graph, NId1, NIdToDistH) shortestDist = {} for item in NIdToDistH: shortestDist[item] = NIdToDistH[item] PRankH = snap.TIntFltH() snap.GetPageRank(Graph, PRankH) simRanks = {} def simRank(Graph, nIters, gamma):
def algorithm(G, D): #Pruning Step P = 1 T = 0 while P == 1: P = 0 for NI in G.Nodes(): NID = NI.GetId() d = NI.GetDeg() if d <= D or d > G.GetNodes() - 2: if d <= D and d > 1: for i in range(d - 1): for j in range(i + 1, d): a = NI.GetNbrNId(i) b = NI.GetNbrNId(j) if G.IsEdge(a, b): T = T + 1 if d > D and d > G.GetNodes() - 2: T = T + G.GetEdges() - NI.GetDeg() P = 1 G.DelNode(NID) #Hierarchical Clustering Step if G.GetNodes() > 5: H = snap.ConvertGraph(type(G), G) S = [] i = 0 while H.GetNodes() > 0: S.append([]) S[i].append(snap.GetMxDegNId(H)) j = 1 TTT = True while TTT: s = snap.TIntV() snap.GetNodesAtHop(H, S[i][0], j, s, True) if len(s) != 0: S[i].append(s) j = j + 1 else: TTT = False H.DelNode(S[i][0]) for j in range(1, len(S[i])): for nodeID in S[i][j]: H.DelNode(nodeID) i = i + 1 subgraphs = [[] for x in range(len(S))] #Counting Step for i in range(len(S)): for j in range(1, len(S[i])): G01 = snap.ConvertSubGraph(snap.PUNGraph, G, S[i][j]) subgraphs[i].append(G01) T = T + subgraphs[i][0].GetEdges() G.DelNode(S[i][0]) for i in range(len(S)): for j in range(1, len(S[i])): for upnodeID in S[i][j]: U = [] D = [] for t in range(G.GetNI(upnodeID).GetDeg()): a = G.GetNI(upnodeID).GetNbrNId(t) if j < len(S[i]) - 1: if subgraphs[i][j].IsNode(a): U.append(a) if j > 1: if subgraphs[i][j - 2].IsNode(a): D.append(a) for s in range(len(U)): for t in range(s + 1, len(U)): if subgraphs[i][j].IsEdge(U[s], U[t]): T = T + 1 for s in range(len(D)): for t in range(s + 1, len(D)): if subgraphs[i][j - 2].IsEdge(D[s], D[t]): T = T + 1 for i in range(len(S)): for j in range(len(S[i]) - 1): T = T + algorithm(subgraphs[i][j], D) return T
import snap import sys # Simple script to re-index to 0-indexed graph. graph = sys.argv[1] if len(sys.argv) > 2 and sys.argv[2] == 1: Gin = snap.LoadEdgeList(snap.PUNGraph, graph) else: Gin = snap.LoadEdgeList(snap.PNGraph, graph) MxScc = snap.GetMxScc(Gin) Gout = snap.ConvertGraph(snap.PNGraph, MxScc, True) print 'Number of nodes: ', Gout.GetNodes() print 'Number of edges: ', Gout.GetEdges() snap.SaveEdgeList(Gout, graph)
def convert_to_undirected(in_Graph): return snap.ConvertGraph(snap.PUNGraph, in_Graph)
import snap import sys # Simple script to re-index to 0-indexed graph. graph = sys.argv[1] if len(sys.argv) > 2 and sys.argv[2] == 2: Gin = snap.LoadEdgeList(snap.PUNGraph, graph) else: Gin = snap.LoadEdgeList(snap.PNGraph, graph) Gout = snap.ConvertGraph(snap.PNGraph, Gin, True) print 'Number of nodes: ', Gout.GetNodes() print 'Number of edges: ', Gout.GetEdges() snap.SaveEdgeList(Gout, graph)
def convert_undirected(G1): G2 = snap.ConvertGraph(snap.PUNGraph, G1) return G2
G2.Save(FOut) FOut.Flush() FIn = snap.TFIn("test.graph") G4 = snap.TNGraph.Load(FIn) print "G4: Nodes %d, Edges %d" % (G4.GetNodes(), G4.GetEdges()) # save and load from a text file snap.SaveEdgeList(G4, "test.txt", "Save as tab-separated list of edges") G5 = snap.LoadEdgeList(snap.PNGraph, "test.txt", 0, 1) print "G5: Nodes %d, Edges %d" % (G5.GetNodes(), G5.GetEdges()) # create a directed random graph on 10k nodes and 5k edges G6 = snap.GenRndGnm(snap.PNGraph, 10000, 5000) print "G6: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges()) # convert to undirected graph G7 = snap.ConvertGraph(snap.PUNGraph, G6) print "G7: Nodes %d, Edges %d" % (G7.GetNodes(), G7.GetEdges()) # get largest weakly connected component WccG = snap.GetMxWcc(G6) # generate a network using Forest Fire model G8 = snap.GenForestFire(1000, 0.35, 0.35) print "G8: Nodes %d, Edges %d" % (G8.GetNodes(), G8.GetEdges()) # get a subgraph induced on nodes {0,1,2,3,4} SubG = snap.GetSubGraph(G8, snap.TIntV.GetV(0, 1, 2, 3, 4)) # get 3-core of G8 Core3 = snap.GetKCore(G8, 3) print "Core3: Nodes %d, Edges %d" % (Core3.GetNodes(), Core3.GetEdges())
import snap import sys ''' Simple script to get maximal bi-connected component. ''' graph = sys.argv[1] Gin = snap.LoadEdgeList(snap.PNGraph, graph) BiCon = snap.GetMxBiCon(Gin) Gout = snap.ConvertGraph(snap.PNGraph, BiCon, True) print 'Number of nodes: ', Gout.GetNodes() print 'Number of edges: ', Gout.GetEdges() out_graph = graph.split('.txt')[0] + '-bicon.txt' snap.SaveEdgeList(Gout, out_graph)
def intro(): # create a graph PNGraph G1 = snap.TNGraph.New() G1.AddNode(1) G1.AddNode(5) G1.AddNode(32) G1.AddEdge(1, 5) G1.AddEdge(5, 1) G1.AddEdge(5, 32) print("G1: Nodes %d, Edges %d" % (G1.GetNodes(), G1.GetEdges())) # create a directed random graph on 100 nodes and 1k edges G2 = snap.GenRndGnm(snap.PNGraph, 100, 1000) print("G2: Nodes %d, Edges %d" % (G2.GetNodes(), G2.GetEdges())) # traverse the nodes for NI in G2.Nodes(): print("node id %d with out-degree %d and in-degree %d" % (NI.GetId(), NI.GetOutDeg(), NI.GetInDeg())) # traverse the edges for EI in G2.Edges(): print("edge (%d, %d)" % (EI.GetSrcNId(), EI.GetDstNId())) # traverse the edges by nodes for NI in G2.Nodes(): for Id in NI.GetOutEdges(): print("edge (%d %d)" % (NI.GetId(), Id)) # generate a network using Forest Fire model G3 = snap.GenForestFire(1000, 0.35, 0.35) print("G3: Nodes %d, Edges %d" % (G3.GetNodes(), G3.GetEdges())) # save and load binary FOut = snap.TFOut("test.graph") G3.Save(FOut) FOut.Flush() FIn = snap.TFIn("test.graph") G4 = snap.TNGraph.Load(FIn) print("G4: Nodes %d, Edges %d" % (G4.GetNodes(), G4.GetEdges())) # save and load from a text file snap.SaveEdgeList(G4, "test.txt", "Save as tab-separated list of edges") G5 = snap.LoadEdgeList(snap.PNGraph, "test.txt", 0, 1) print("G5: Nodes %d, Edges %d" % (G5.GetNodes(), G5.GetEdges())) # generate a network using Forest Fire model G6 = snap.GenForestFire(1000, 0.35, 0.35) print("G6: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges())) # convert to undirected graph G7 = snap.ConvertGraph(snap.PUNGraph, G6) print("G7: Nodes %d, Edges %d" % (G7.GetNodes(), G7.GetEdges())) # get largest weakly connected component of G WccG = snap.GetMxWcc(G6) # get a subgraph induced on nodes {0,1,2,3,4,5} SubG = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4)) # get 3-core of G Core3 = snap.GetKCore(G6, 3) # delete nodes of out degree 10 and in degree 5 snap.DelDegKNodes(G6, 10, 5) print("G6a: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges())) # generate a Preferential Attachment graph on 1000 nodes and node out degree of 3 G8 = snap.GenPrefAttach(1000, 3) print("G8: Nodes %d, Edges %d" % (G8.GetNodes(), G8.GetEdges())) # vector of pairs of integers (size, count) CntV = snap.TIntPrV() # get distribution of connected components (component size, count) snap.GetWccSzCnt(G8, CntV) # get degree distribution pairs (degree, count) snap.GetOutDegCnt(G8, CntV) # vector of floats EigV = snap.TFltV() # get first eigenvector of graph adjacency matrix snap.GetEigVec(G8, EigV) # get diameter of G8 snap.GetBfsFullDiam(G8, 100) # count the number of triads in G8, get the clustering coefficient of G8 snap.GetTriads(G8) snap.GetClustCf(G8)
def main(): # Load data nodes = pd.read_csv("../data/nodes.csv", sep='\t', index_col=0) # Data in nice form headers = list(nodes.columns) nodes = np.asarray(nodes) # Load social network accordingly if path.exists("../data/youtube.graph"): FIn = snap.TFIn("../data/youtube.graph") social_network = snap.TNGraph.Load(FIn) else: edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) social_network = data2dag(edges, nodes.shape[0]) # Check for self edges for e in social_network.Edges(): if e.GetSrcNId() == e.GetDstNId(): print("Self Loop Found:", e.GetSrcNId()) # CNM Algorithm from snap.py print("Computing CNM") start = timeit.default_timer() CmtyV = snap.TCnComV() undirected = snap.ConvertGraph(snap.PUNGraph, social_network) snap.DelSelfEdges(undirected) the_modularity = snap.CommunityCNM(undirected, CmtyV) stop = timeit.default_timer() node_to_cmty = np.zeros(nodes.shape[0]) cmty_sizes = np.zeros(len(CmtyV)) for i in range(len(CmtyV)): for node in CmtyV[i]: node_to_cmty[node] = i cmty_sizes[i] = len(CmtyV[i]) cmtys = [[node for node in cmty] for cmty in CmtyV] ''' edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) G = nx.Graph() G.add_nodes_from(range(nodes.shape[0])) G.add_edges_from(list(map(tuple, edges))) ''' #assert(is_partition(G, cmtys)) #print("Calculating Modularity") #modul = modularity(G, cmtys) print("Results from Clauset-Newman-Moore:") #print("Modularity:",modul) print("Number of clusters:", len(CmtyV)) print("Time elapsed:", stop - start) # Fun category stuff to do upload_col = headers.index('category') categories = set() for i in range(nodes.shape[0]): categories.add(nodes[i][upload_col]) idx_to_categories = list(categories) print("Number of categories:", len(idx_to_categories)) categories_to_idx = dict() for i in range(len(idx_to_categories)): categories_to_idx[idx_to_categories[i]] = i # Communities and categories cmty_category_count = np.zeros((len(CmtyV), len(idx_to_categories))) for i in range(nodes.shape[0]): cmty_category_count[int(node_to_cmty[i]), categories_to_idx[nodes[i][upload_col]]] += 1 cmty_category_count = cmty_category_count / cmty_sizes[:, np.newaxis] # Create graphs per category plt.figure() plt.plot(sorted(np.max(cmty_category_count, axis=1), reverse=True), label="Top proportion") plt.plot(0.5 * np.ones(cmty_category_count.shape[0]), label="Majority Threshold", linestyle='dashed') plt.title("Category Proportions in Clusters") plt.xlabel("Cluster") plt.ylabel("Proportion") plt.legend() plt.savefig("../figures/category_top_clusters.png") ''' for i in range(cmty_category_count.shape[0]): top_category = np.argmax(cmty_category_count[i]) print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category]) ''' '''