def computeClusteringCoeff(G, NodeAttributes): NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(G, NIdCCfH) ClusterCoeffList = list() for nodeId in NIdCCfH: NodeAttributes[nodeId]['ClusterCoeff'] = NIdCCfH[nodeId] ClusterCoeffList.append((nodeId, NIdCCfH[nodeId])) ClusterCoeffList.sort(key=lambda x: x[1], reverse=True) minClusterCoeff = min(ClusterCoeffList, key=lambda x: x[1])[1] maxClusterCoeff = max(ClusterCoeffList, key=lambda x: x[1])[1] # # Sanity Check # print ClusterCoeffList[1], maxClusterCoeff, ClusterCoeffList[ -1], minClusterCoeff NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(G, NIdCCfH) ClusterCoeffList = list() for nodeId in NIdCCfH: clusterCoeff = NIdCCfH[nodeId] normClusterCoeff = (clusterCoeff - minClusterCoeff) / ( maxClusterCoeff - minClusterCoeff) NodeAttributes[nodeId]['NormClusterCoeff'] = normClusterCoeff #print NodeAttributes[2012] return NodeAttributes
def get_clustering_coeff(filename): graph = snap.LoadEdgeList(snap.PUNGraph, filename) degree_list = [] coeff_list = [] avg_coeff = collections.defaultdict(int) for node in graph.Nodes(): avg_coeff[node.GetOutDeg()] = ( avg_coeff[node.GetOutDeg()] + snap.GetNodeClustCf(graph, node.GetId())) / 2.0 degree_list.append(node.GetOutDeg()) coeff_list.append(snap.GetNodeClustCf(graph, node.GetId())) return avg_coeff
def CalculateClusteringCoefficient(graph): #output={} NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(graph, NIdCCfH) print "CLUSTERRING COEFFICIENT" for item in NIdCCfH: print "Node %d th have coefficient %f" % (item, NIdCCfH[item])
def ccDist(G): ccTemp = collections.defaultdict(lambda: (0, 0)) for n in G.Nodes(): cnt, totalCC = ccTemp[n.GetOutDeg()] ccTemp[n.GetOutDeg()] = (cnt+1, totalCC + snap.GetNodeClustCf(G, n.GetId())) ccD = collections.defaultdict(lambda: 0) for k, (cnt, totalCC) in ccTemp.items(): ccD[k] = totalCC * 1.0/cnt return ccD
def calculate_clustering_coeff(G, nodes): coeffs = [] for node in nodes: if not G.IsNode(node): continue coeffs.append(snap.GetNodeClustCf(G, node)) avg = np.average(coeffs) std = np.std(coeffs) n = len(coeffs) err = 1.96 * std / np.sqrt(n) print("avg: {:0.3f}\tstd: {:0.3f}\tsamples: {}\terr: {:0.3f}".format( avg, std, n, err))
def _get_CC(Graph, H, output_path): NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(Graph, NIdCCfH) dataset = list() for ID in NIdCCfH: CC = dict() CC['username'] = H.GetKey(ID) CC['CC'] = NIdCCfH[ID] dataset.append(CC) dataset = pd.DataFrame(dataset) dataset = dataset[['username', 'CC']] dataset.to_csv(output_path, index=False, encoding='utf-8')
def print_ccf_of_random_node(G, GName): NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(G, NIdCCfH) random_node_index = snap.TInt.GetRnd(len(NIdCCfH)) counter = 0 for item in NIdCCfH: if counter == random_node_index: print "Clustering coefficient of random node {0} in {1}: {2}".format( item, GName[:-10], NIdCCfH[item]) break counter = counter + 1
def DegClustDist(graph): """ Calculates the degree - clustering coefficient distribution of a given graph. The graph must be in snap format. ... Parameters ---------- graph : an instance of SNAP.TUNGraph()/SNAP.TNGraph() for undirected/directed graph The graph for which the degree distribution is to be calculated Returns ------- coeff_dist : 2D numpy array with shape (2, :) The calculated degree distribution for graph. coeff_dist[0, :] : degree value coeff_dist[1, :] : mean clustering coefficient of the nodes with a given degree """ deg_coeff = {} stopwatch = StopWatch(0.5) N_deg = graph.GetNodes() j = 0 for node in graph.Nodes(): PrintProgress(stopwatch(), j/N_deg) deg = node.GetDeg() clust = snap.GetNodeClustCf(graph, node.GetId()) if deg in deg_coeff.keys(): deg_coeff[deg].append(clust) else: deg_coeff[deg] = [clust] j+=1 coeff_dist = np.zeros((2, len(deg_coeff.keys()))) i = 0 for k, v in deg_coeff.items(): coeff_dist[0, i] = k coeff_dist[1, i] = np.mean(v) i+=1 # Sort results sort_ind = np.argsort(coeff_dist[0, :]) coeff_dist = coeff_dist[:, sort_ind] PrintProgress(True, 1) print('\n') return coeff_dist
def get_clustering_coefficient(graph, gtype='snap'): nids, ccs = [], [] if gtype == 'snap': NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(graph, NIdCCfH) for item in NIdCCfH: nids.append(item) ccs.append(NIdCCfH[item]) elif gtype == 'nx': cc_output = nx.clustering(graph) for nid in np.sort(list(cc_output.keys())): nids.append(nid) ccs.append(cc_output[nid]) return np.asarray(nids, dtype='uint32'), np.asarray(ccs, dtype='float32')
def calcClusteringCoefficientSingleNode(Node, Graph): """ :param - Node: node from snap.PUNGraph object. Graph.Nodes() will give an iterable of nodes in a graph :param - Graph: snap.PUNGraph object representing an undirected graph return type: float returns: local clustering coeffient of Node """ ############################################################################ # TODO: Your code here! C = 0.0 C = snap.GetNodeClustCf(Graph, Node.GetId()) ############################################################################ return C
def q4_2_aux(): FIn = snap.TFIn('HDN.graph') HDN = snap.TUNGraph.Load(FIn) edgesN = HDN.GetEdges() verticesN = HDN.GetNodes() print "nodes in HDN:", verticesN print "edged in HDN:", edgesN density = float(edgesN) / (verticesN * (verticesN - 1) / 2) print "density of the graph is", density CSum = 0.0 #HDN.Dump() for i in range(1000): NId = HDN.GetRndNId() Ctemp = snap.GetNodeClustCf(HDN, NId) #print Ctemp CSum += Ctemp c = float(CSum) / 1000 print "average Clustering coeficient in HDN:", c
def print_connectivity_clustering(G): """ Prints the average clustering coefficient, number of triads in subgraph G Also prints clustering coefficient and number of triads for random nodes Also prints the number of edges that participate in at least one triad """ GraphClustCoeff = snap.GetClustCf(G) print("Average clustering coefficient:", round(GraphClustCoeff, 4)) print("Number of triads:", snap.GetTriads(G)) NId = G.GetRndNId() print(f'Clustering coefficient of random node {NId}:', round(snap.GetNodeClustCf(G, NId))) NId = G.GetRndNId() print(f'Number of triads random node {NId} participates:', snap.GetNodeTriads(G, NId)) print('Number of edges that participate in at least one triad:', snap.GetTriadEdges(G))
def get_each_nodes_ClusteringCofficient(graph): ClusteringCofficients = snap.TIntFltH() snap.GetNodeClustCf(graph, ClusteringCofficients) return ClusteringCofficients
def get_clustering_coeff(UGraph, attributes): coeff = np.zeros((UGraph.GetNodes(), )) for NI in UGraph.Nodes(): i = NI.GetId() coeff[i] = snap.GetNodeClustCf(UGraph, i) attributes['ClusteringCoeff'] = coeff
import numpy as np import snap import matplotlib.pyplot as plt import random payoff_a = 2 payoff_b = 3 thersold = payoff_a / (payoff_a + payoff_b) nodestatusList = [] nodedegreeList = [] nodeaffectList = [] nodeinitialList = [] G = snap.LoadEdgeList(snap.PUNGraph, "data/soc-Slashdot0902.txt", 0, 1, '\t') snap.PlotClustCf(G, "project_cluster_coeff", "Undirected graph - clustering coefficient") DegToCCfV = snap.TFltPrV() result = snap.GetClustCfAll(G, DegToCCfV) for item in DegToCCfV: print("degree: %d, clustering coefficient: %f" % (item.GetVal1(), item.GetVal2())) print("average clustering coefficient", result[0]) clusterfile = open("clustering_list.txt", "w") NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(G, NIdCCfH) for item in NIdCCfH: clusterfile.write("%d %d\r\n" % (item, NIdCCfH[item]))
matplotlib.use('Agg') import matplotlib.pyplot as plt #loading steam-sweden dataset Graph = snap.LoadEdgeList(snap.PUNGraph, "Steam-Sweden.txt", 0, 1) #calculating number of triads with random sampling NumTriads = snap.GetTriads(Graph, -1) print "Number of triads: " + str(NumTriads) #selecting random node rm_node = Graph.GetRndNId() #random node clustering coefficient rm_clus_coeff = snap.GetNodeClustCf(Graph, rm_node) print "Clustering coefficient of random node ", rm_node, " in Steam-Sweden: ", rm_clus_coeff #Number of triads a randomly selected node participates in num_triads = snap.GetNodeTriads(Graph, rm_node) print "Number of triads of node ", rm_node, " participates in ", num_triads, " triads" #avg and global clustering coefficient TriadV = snap.TIntTrV() snap.GetTriads(Graph, TriadV, -1) OpenTriads = 0 ClosedTriads = 0 for triple in TriadV: OpenTriads += triple.Val3() ClosedTriads += triple.Val2()
def graphStructure(elistName, elistPath): """ Calculate properties of the graph as given in the assignment Args: elistName (str) -> Input elist name elistPath (pathlib.Path) -> Input elist using which graph needs to be built Return: RESULTS (dict) -> Dictionary containing results for different subparts of the assignment """ RESULTS = {} subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1) # Part 1 (Size of the network) RESULTS['nodeCount'] = subGraph.GetNodes() RESULTS['edgeCount'] = subGraph.GetEdges() # Part 2 (Degree of nodes in the network) maxDegree = 0 maxDegreeNodes = [] degree7Count = 0 for node in subGraph.Nodes(): if node.GetDeg() == 7: degree7Count += 1 maxDegree = max(maxDegree, node.GetDeg()) for node in subGraph.Nodes(): if node.GetDeg() == maxDegree: maxDegreeNodes.append(node.GetId()) plotFilename = f"deg_dist_{elistName}" # Since it is an undirected graph, in/out degree is unimportant snap.PlotOutDegDistr(subGraph, plotFilename) RESULTS['maxDegree'] = maxDegree RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes)) RESULTS['degree7Count'] = degree7Count # Part 3 (Paths in the network) # Full Diameter Calculation fullDiameters = { 10: snap.GetBfsFullDiam(subGraph, 10, False), 100: snap.GetBfsFullDiam(subGraph, 100, False), 1000: snap.GetBfsFullDiam(subGraph, 1000, False) } fullMean, fullVariance = meanVariance(fullDiameters.values()) fullDiameters['mean'] = fullMean fullDiameters['variance'] = fullVariance RESULTS['fullDiameters'] = fullDiameters # Effective Diameter Calculation effDiameters = { 10: snap.GetBfsEffDiam(subGraph, 10, False), 100: snap.GetBfsEffDiam(subGraph, 100, False), 1000: snap.GetBfsEffDiam(subGraph, 1000, False), } effMean, effVariance = meanVariance(effDiameters.values()) effDiameters['mean'] = effMean effDiameters['variance'] = effVariance RESULTS['effDiameters'] = effDiameters plotFilename = f"shortest_path_{elistName}" snap.PlotShortPathDistr(subGraph, plotFilename) # Part 4 (Components of the network) edgeBridges = snap.TIntPrV() articulationPoints = snap.TIntV() RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph) snap.GetEdgeBridges(subGraph, edgeBridges) snap.GetArtPoints(subGraph, articulationPoints) RESULTS['edgeBridges'] = len(edgeBridges) RESULTS['articulationPoints'] = len(articulationPoints) plotFilename = f"connected_comp_{elistName}" snap.PlotSccDistr(subGraph, plotFilename) # Part 5 (Connectivity and clustering in the network) RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1) RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0] nodeX = subGraph.GetRndNId(Rnd) nodeY = subGraph.GetRndNId(Rnd) RESULTS['randomClusterCoefficient'] = (nodeX, snap.GetNodeClustCf( subGraph, nodeX)) RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY)) RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph) plotFilename = f"clustering_coeff_{elistName}" snap.PlotClustCf(subGraph, plotFilename) return RESULTS
def get_clustering_coefficient(G, n): return snap.GetNodeClustCf(G, n)
def net_structure(dataset_dir, output_dir, graph_type, metric, net, alg): os.system('clear') print( "\n######################################################################\n" ) print("\nScript para cálculo do coef_clust das comunidades detectadas\n") graphs_dir = "/home/amaury/graphs_hashmap_infomap_without_weight/" + str( net) + "/" + str(graph_type) + "/" if not os.path.exists(graphs_dir): print("Diretório não encontrado: " + str(graphs_dir)) else: print( "\n######################################################################\n" ) print( "\nScript para cálculo do Coeficiente de Clustering das comunidades detectadas - Rede " + str(net) + "\n") if not os.path.isdir(dataset_dir + str(net) + "/"): print("Diretório com avaliações da rede " + str(net) + " não encontrado: " + str(dataset_dir + str(net) + "/")) else: for threshold in os.listdir(dataset_dir + str(net) + "/"): if os.path.isfile(str(output_dir) + str(threshold) + ".json"): print("Arquivo de destino já existe. " + str(output_dir) + str(threshold) + ".json") else: coef_clust = [ ] # Vetor com a Média dos coeficientes de cada grafo coef_clust_data = { } # Dicionário com o ego coef_clust para cada comunidade i = 0 for file in os.listdir(dataset_dir + str(net) + "/" + str(threshold) + "/"): i += 1 ego_id = file.split(".txt") ego_id = long(ego_id[0]) communities = [ ] # Armazenar as comunidades da rede-ego m_file = [ ] # vetor de coeficientes das comunidades do ego i try: G = snap.LoadEdgeList( snap.PNGraph, str(graphs_dir) + str(ego_id) + ".edge_list", 0, 1 ) # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t') n_edges = G.GetEdges( ) # Número de arestas do grafo if n_edges == 0: a = 0 m_file.append(a) else: try: with open( dataset_dir + str(net) + "/" + str(threshold) + "/" + str(file), 'r') as f: for line in f: comm = [ ] #Lista para armazenar as comunidades a = line.split(' ') for item in a: if item != "\n": comm.append(item) communities.append(comm) except Exception as e: print( "\nERRO - Impossível carregar as comunidades: " + dataset_dir + str(net) + "/" + str(threshold) + "/" + str(file) + "\n") print e _cf = [] for comm in communities: if comm is not None: for nodeId in comm: if nodeId is not None: _cf.append( snap.GetNodeClustCf( G, int(nodeId)) ) # Clusterinf Coefficient result = calc.calcular(_cf) m_file.append(result['media']) print("Clustering Coef para o ego " + str(i) + " (" + str(file) + "): " + str(result['media'])) print except Exception as e: print( "\nERRO - Impossível carregar o grafo para o ego: " + str(ego_id) + " -- " + str(graphs_dir) + str(ego_id) + ".edge_list\n") print e _m_file = calc.calcular(m_file) coef_clust_data[ego_id] = m_file if _m_file is not None: coef_clust.append(_m_file['media']) print( str(graph_type) + " - Rede: " + str(net) + " - Threshold: " + str(threshold) + " - Coef_Clustering para o ego " + str(i) + " (" + str(file) + "): %5.3f" % (_m_file['media'])) print( "######################################################################" ) M = calc.calcular_full(coef_clust) if M is not None: overview = { 'threshold': threshold, 'coef_clust': M, 'coef_clust_data': coef_clust_data } print( "\n######################################################################\n" ) print( "Rede: %s --- Threshold: %s --- Coef_Clust: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f" % (net, threshold, M['media'], M['variancia'], M['desvio_padrao'])) print( "\n######################################################################\n" ) if overview is not None: with open( str(output_dir) + str(threshold) + ".json", 'a+') as f: f.write(json.dumps(overview) + "\n") print( "\n######################################################################\n" )
def net_structure(dataset_dir, output_dir, net, IsDir, weight): print( "\n######################################################################\n" ) if os.path.isfile(str(output_dir) + str(net) + "_clustering_coef.json"): print("Arquivo já existe: " + str(output_dir) + str(net) + "_clustering_coef.json") else: print("Dataset clustering coefficient - " + str(dataset_dir)) cf = [] # Média dos coeficientes de clusterings por rede-ego gcf = [] # Média usando opção global n = [] # vetor com número de vértices para cada rede-ego e = [] # vetor com número de arestas para cada rede-ego i = 0 for file in os.listdir(dataset_dir): i += 1 print( str(output_dir) + str(net) + "/" + str(file) + " - Calculando propriedades para o ego " + str(i) + ": " + str(file)) if IsDir is True: G = snap.LoadEdgeList( snap.PNGraph, dataset_dir + file, 0, 1 ) # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t') else: G = snap.LoadEdgeList( snap.PUNGraph, dataset_dir + file, 0, 1 ) # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t') # G.Dump() # time.sleep(5) ##################################################################################### n.append(G.GetNodes()) # Numero de vertices e.append(G.GetEdges()) # Numero de arestas n_nodes = G.GetNodes() n_edges = G.GetEdges() ##################################################################################### #Usando opção local - Retorna o mesmo resultado do global if n_edges == 0: a = 0 cf.append(a) print("Nenhuma aresta encontrada para a rede-ego " + str(i) + " - (" + str(file)) else: NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(G, NIdCCfH) _cf = [] for item in NIdCCfH: _cf.append(NIdCCfH[item]) # Clusterinf Coefficient result = calc.calcular(_cf) cf.append(result['media']) print("Clustering Coef para o ego " + str(i) + " (" + str(file) + "): " + str(result['media'])) print ##################################################################################### #Usando opção global - Retorna o mesmo resultado do local # # if n_edges == 0: # a = 0 # gcf.append(a) # else: # GraphClustCoeff = snap.GetClustCf (G) # gcf.append(GraphClustCoeff) # print "Clustering coefficient: %f" % GraphClustCoeff # print ##################################################################################### CF = calc.calcular_full(cf) overview = {} overview['ClusteringCoefficient'] = CF with open(str(output_dir) + str(net) + "_clustering_coef.json", 'w') as f: f.write(json.dumps(overview)) with open(str(output_dir) + str(net) + "_clustering_coef.txt", 'w') as f: f.write( "\n######################################################################\n" ) f.write( "Clustering Coef: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" % (CF['media'], CF['variancia'], CF['desvio_padrao'])) f.write( "\n######################################################################\n" ) print( "\n######################################################################\n" ) print( "Clustering Coef: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" % (CF['media'], CF['variancia'], CF['desvio_padrao'])) print( "\n######################################################################\n" )
NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) write(NIdHubH, "hub.txt") write(NIdAuthH, "auth.txt") Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(G, Nodes, Edges, 1.0) write(Nodes, "between.txt") rows = [] for i, node in enumerate(G.Nodes()): if i % 10000 == 0: print "on iteration {}".format(i) nid = node.GetId() ecc = snap.GetNodeEcc(G, nid) clust = snap.GetNodeClustCf(G, nid) rows.append([nid, ecc, clust]) with open(base + "ecc_clust.txt", 'w') as f: for row in rows: f.write(",".join(map(str, row)) + "\n") ArtNIdV = snap.TIntV() snap.GetArtPoints(G, ArtNIdV) with open(base + "art.txt", "w") as f: for NI in ArtNIdV: f.write("{}\n".format(NI))
def get_graph_overview(G, Gd=None): ''' G here is an undirected graph ''' # degree distribution CntV = snap.TIntPrV() snap.GetOutDegCnt(G, CntV) deg_x, deg_y = [], [] max_deg = 0 for item in CntV: max_deg = max(max_deg, item.GetVal1()) deg_x.append(item.GetVal1()) deg_y.append(item.GetVal2()) # print item.GetVal1(), item.GetVal2() print 'max_deg = ', max_deg deg_cnt = np.zeros(max_deg + 1) for item in CntV: deg_cnt[item.GetVal1()] = item.GetVal2() print deg_cnt # plt.loglog(deg_x, deg_y) # plt.xlabel('Degree of nodes') # plt.ylabel('Number of nodes') # plt.savefig('Giu_deg_dist.png') # plt.clf() # clustering coefficient distribution cf = snap.GetClustCf(G) print 'average cf =', cf NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(G, NIdCCfH) ccf_sum = np.zeros(max_deg + 1) for item in NIdCCfH: ccf_sum[G.GetNI(item).GetDeg()] += NIdCCfH[item] # print item, NIdCCfH[item] ccf_x, ccf_y = [], [] for i in range(max_deg + 1): if deg_cnt[i] != 0: ccf_sum[i] /= deg_cnt[i] ccf_x.append(i) ccf_y.append(ccf_sum[i]) print ccf_y # plt.loglog(ccf_x, ccf_y) # plt.xlabel('Degree of nodes') # plt.ylabel('Average clustering coefficient of nodes with the degree') # plt.savefig('Giu_ccf_dist.png') # plt.clf() # snap.PlotClustCf(G, 'investor_network', 'Distribution of clustering coefficients') # diameter and shortest path distribution diam = snap.GetBfsFullDiam(G, 100) print diam # snap.PlotShortPathDistr(G, 'investor_network', 'Distribution of shortest path length') # rewired_diams = [] # for i in range(100): # print 'rewire: ', i # G_config = rewire_undirected_graph(G) # rewired_diams.append(snap.GetBfsFullDiam(G_config, 400)) # print rewired_diams # print 'null model diam mean: ', np.mean(rewired_diams) # print 'null model diam std: ', np.std(rewired_diams) # wcc and scc size distribution WccSzCnt = snap.TIntPrV() snap.GetWccSzCnt(G, WccSzCnt) print 'Distribution of wcc:' for item in WccSzCnt: print item.GetVal1(), item.GetVal2() if Gd != None: print 'Distribution of scc:' ComponentDist = snap.TIntPrV() snap.GetSccSzCnt(Gd, ComponentDist) for item in ComponentDist: print item.GetVal1(), item.GetVal2()
import snap import sys import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt input_file = sys.argv[1] Graph = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1) clc = set() cluster = dict() for node in Graph.Nodes(): clustering = snap.GetNodeClustCf(Graph, node.GetId()) cluster[node.GetId()] = clustering for item in cluster: clc.add(cluster[item]) with open(sys.argv[1] + '.clustering.txt', 'w+') as fp: for p in sorted(cluster.items(), key=lambda (k, v): (v, k), reverse=True): fp.write("%s : %s\n" % p) clc = sorted(clc, key=float, reverse=True) #plotting clustering coefficient plt.plot(np.arange(1, len(clc) + 1, 1), clc, 'b.') plt.xlabel('Rank') plt.ylabel('Clustering Coefficient')
Art_points = snap.TIntV() snap.GetArtPoints(Graph1, Art_points) art = Art_points.Len() print("Number of articulation points: ", art) str2 = "connected_comp_" + file_name snap.PlotSccDistr(Graph1, str2, "Distribution of sizes of connected components") #5.Connectivity and clustering in the network avg_cc = snap.GetClustCf(Graph1, -1) print("Average clustering coefficient: %0.4f" % avg_cc) triads = snap.GetTriads(Graph1, -1) print("Number of triads: ", triads) random1 = Graph1.GetRndNId(Rnd) node_cc = snap.GetNodeClustCf(Graph1, random1) print("Clustering coefficient of random node %d: %0.4f" % (random1, node_cc)) random2 = Graph1.GetRndNId(Rnd) node_triads = snap.GetNodeTriads(Graph1, random2) print("Number of triads random node %d participates: %d" % (random2, node_triads)) triad_edges = snap.GetTriadEdges(Graph1, -1) print("Number of edges that participate in at least one triad: ", triad_edges) str3 = "clustering_coeff_" + file_name snap.PlotClustCf(Graph1, str3, "The distribution of clustering coefficient")
def getClusteringCoeff(Graph): coeffs = [] for i in range(Graph.GetNodes()): coeffs.append(snap.GetNodeClustCf(Graph, i)) return coeffs
def main(): parser = ArgumentParser("node_heu",formatter_class=ArgumentDefaultsHelpFormatter,conflict_handler='resolve') # Required arguments parser.add_argument("--network", type=str, required=True, help='The path and name of the .mat file containing the adjacency matrix and node labels of the input network') parser.add_argument("--edgelist", type=str, required=True, help='The path and name of the edgelist file with no weights containing the edgelist of the input network') parser.add_argument("--dataset", type=str, required=True, help='The name of your dataset (used for output)') # Optional arguments parser.add_argument("--adj_matrix_name", default='network', help='The name of the adjacency matrix inside the .mat file') parser.add_argument("--label_matrix_name", default='group', help='The name of the labels matrix inside the .mat file') args = parser.parse_args() print (args) mat, A, graph, labels_matrix, labels_count, indices = load_graph(args.network, args.adj_matrix_name, args.label_matrix_name) s_time = time.time() # Load edgelist as undirected graph in SNAP G = snap.LoadEdgeList(snap.PUNGraph, args.edgelist) print ("Loading graph in SNAP ... {}".format(str(args.edgelist))) # Load edgelist for networkx G_NETX = nx.read_edgelist(args.edgelist) print ("Loading graph in NetworkX .... {}".format(str(args.edgelist))) # Get Average Neighbor Degreeh from NetworkX (only time NetworkX is used) AvgNeighDe = nx.average_neighbor_degree(G_NETX) # Calculate Page Rank p_time = time.time() PRankH = snap.TIntFltH() snap.GetPageRank(G, PRankH) print ("Finished in Page rank in {}".format(str(time.time()-p_time))) # Calculate Hub and Authrity Scores h_time = time.time() NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G, NIdHubH, NIdAuthH) print ("Finished in Hub and Auth Scores in {}".format(str(time.time()-h_time))) count = 0 node_data = [] fl_100 = time.time() print ("Num of nodes: {}".format(len(PRankH))) print ("Num of nodes with labels: {}".format(len(indices))) print ("Collecting other features for each node ...") for n in G.Nodes(): nid = n.GetId() if nid in indices: node_data.append((nid, n.GetInDeg(), PRankH[n.GetId()], snap.GetNodeClustCf(G, nid), NIdHubH[n.GetId()], NIdAuthH[n.GetId()], AvgNeighDe[str(nid)], snap.GetNodeEcc(G, nid))) count = count + 1 if count % 1000 == 0: print ("Processed {} nodes".format(str(count))) print (time.time() - fl_100) fl_100 = time.time() nhdf = pd.DataFrame(node_data, columns=('NodeId', 'Degree', 'PageRankScore', 'NodeClustCf', 'HubScore', 'AuthScore', 'AverageNeighborDegree', 'NodeEcc')) nhdf.to_csv((args.network.replace(".mat", "") + "_node_heuristic_features.csv"), index=False) print ("File saved at {}".format((args.network.replace(".mat", "") + "_node_heuristic_features.csv"))) nhdf = pd.DataFrame(node_data, columns=('NodeId', 'Degree', 'PageRankScore', 'NodeClustCf', 'HubScore', 'AuthScore', 'AverageNeighborDegree', 'NodeEcc')) nhdf.to_csv((args.network.replace(".mat", "") + "_node_heuristic_features.csv"), index=False) print ("File saved at {}".format((args.network.replace(".mat", "") + "_node_heuristic_features.csv"))) print ("Finished in {}".format(str(time.time()-s_time)))
list(connected_component.values()), s=15) plt.xlabel("Size of Connected Components") plt.ylabel("Number of components") plt.title("Connected Component Distribution ({})".format(graph_filename[:-6])) plt.savefig(plot_filedir) # [5] Connectivity and Clustering in the Network cluster_coeff = snap.GetClustCf(G, -1) print("Average clustering coefficient: {}".format(round(cluster_coeff, 4))) num_triads = snap.GetTriads(G, -1) print("Number of triads: {}".format(num_triads)) node_id = G.GetRndNId(Rnd) node_cluster_coeff = snap.GetNodeClustCf(G, node_id) print("Clustering coefficient of random node {}: {}".format( node_id, round(node_cluster_coeff, 4))) node_id = G.GetRndNId(Rnd) node_num_triads = snap.GetNodeTriads(G, node_id) print("Number of triads random node {} participates: {}".format( node_id, node_num_triads)) triad_edge = snap.GetTriadEdges(G) print("Number of edges that participate in at least one triad: {}".format( triad_edge)) cf_dist = snap.TFltPrV() coeff = snap.GetClustCf(G, cf_dist, -1) degree_coeff = {}
snap.GetTriads(email_enron_subgraph, -1)) if (sub_graph_name == "p2p-Gnutella04-subgraph"): # Computing no of Triads print "Number of Triads in p2p-Gnutella04-subgraph :" + str( snap.GetTriads(p2p_gnutella04_subgraph, -1)) # Task 1.2.5.3 if (sub_graph_name == "soc-Epinions1-subgraph"): # Clustering coeffiecient of a random node Rand = snap.TRnd(42) Rand.Randomize() RandNode1 = soc_epinions1_subgraph.GetRndNId(Rand) print "Clustering coefficient of random node " + str( RandNode1) + " in soc-Epinions1-subgraph : " + str( round(snap.GetNodeClustCf(soc_epinions1_subgraph, RandNode1), 4)) if (sub_graph_name == "cit-HepPh-subgraph"): # Clustering coeffiecient of a random node Rand = snap.TRnd(42) Rand.Randomize() RandNode2 = cit_heph_subgraph.GetRndNId(Rand) print "Clustering coefficient of random node " + str( RandNode2) + " in cit-HepPh-subgraph : " + str( round(snap.GetNodeClustCf(cit_heph_subgraph, RandNode2), 4)) if (sub_graph_name == "email-Enron-subgraph"): # Clustering coeffiecient of a random node Rand = snap.TRnd(42) Rand.Randomize() RandNode3 = email_enron_subgraph.GetRndNId(Rand) print "Clustering coefficient of random node " + str( RandNode3) + " in email-Enron-subgraph : " + str(
#b EdgeBridgeV = snap.TIntPrV() snap.GetEdgeBridges(fbsgel, EdgeBridgeV) print("Number of edge bridges:", len(EdgeBridgeV)) #c ArtNIdV = snap.TIntV() snap.GetArtPoints(fbsgel, ArtNIdV) print("Number of articulation points:", len(ArtNIdV)) #d Plot snap.PlotSccDistr(fbsgel, "connected_comp_" + str(subgraph_name), "connected_comp_" + str(subgraph_name)) #Q5 #a print("Average clustering coefficient:", round(snap.GetClustCf(fbsgel, -1), 4)) #b print("Number of triads:", snap.GetTriads(fbsgel, -1)) #c RnId = fbsgel.GetRndNId(Rnd) print("Clustering coefficient of random node " + str(RnId) + ":", round(snap.GetNodeClustCf(fbsgel, RnId), 4)) #d print("Number of triads random node " + str(RnId) + " participates:", snap.GetNodeTriads(fbsgel, RnId)) #e print("Number of edges that participate in at least one triad:", snap.GetTriadEdges(fbsgel, -1)) #f Plot snap.PlotClustCf(fbsgel, "clustering_coeff_" + str(subgraph_name), "clustering_coeff_" + str(subgraph_name))
def main(): parentDir = os.getcwd() os.chdir(parentDir + "/subgraphs") sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1) subGraphName = sys.argv[1].split(".")[0] os.chdir(parentDir) #### 1 ######## node_count = 0 for node in sub_graph.Nodes(): node_count = node_count + 1 printWithOutNewLine("Number of nodes:", node_count) printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph)) #### 2 ######## printWithOutNewLine("Number of nodes with degree=7:", snap.CntDegNodes(sub_graph, 7)) rndMaxDegNId = snap.GetMxDegNId(sub_graph) nodeDegPairs = snap.TIntPrV() snap.GetNodeInDegV(sub_graph, nodeDegPairs) maxDegVal = 0 for pair in nodeDegPairs: if (pair.GetVal1() == rndMaxDegNId): maxDegVal = pair.GetVal2() break maxDegNodes = [] for pair in nodeDegPairs: if (pair.GetVal2() == maxDegVal): maxDegNodes.append(pair.GetVal1()) print("Node id(s) with highest degree:", end=" ") print(*maxDegNodes, sep=',') #### 3 ######## sampledFullDiam = [] sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False)) sampledFullDiamStats = [] sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4)) sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4)) printWithOutNewLine("Approximate full diameter by sampling 10 nodes:", sampledFullDiam[0]) printWithOutNewLine("Approximate full diameter by sampling 100 nodes:", sampledFullDiam[1]) printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:", sampledFullDiam[2]) print("Approximate full diameter (mean and variance):", end=" ") print(*sampledFullDiamStats, sep=',') sampledEffDiam = [] sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4)) sampledEffDiamStats = [] sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4)) sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4)) printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:", sampledEffDiam[0]) printWithOutNewLine( "Approximate effective diameter by sampling 100 nodes:", sampledEffDiam[1]) printWithOutNewLine( "Approximate effective diameter by sampling 1000 nodes:", sampledEffDiam[2]) print("Approximate effective diameter (mean and variance):", end=" ") print(*sampledEffDiamStats, sep=',') #### 4 ######## printWithOutNewLine("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(sub_graph), 4)) bridgeEdges = snap.TIntPrV() snap.GetEdgeBridges(sub_graph, bridgeEdges) printWithOutNewLine("Number of edge bridges:", len(bridgeEdges)) articulationPoints = snap.TIntV() snap.GetArtPoints(sub_graph, articulationPoints) printWithOutNewLine("Number of articulation points:", len(articulationPoints)) #### 5 ######## printWithOutNewLine("Average clustering coefficient:", round(snap.GetClustCf(sub_graph, -1), 4)) printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1)) randomNodeId = sub_graph.GetRndNId() nodeIdCcfMap = snap.TIntFltH() snap.GetNodeClustCf(sub_graph, nodeIdCcfMap) print("Clustering coefficient of random node", end=" ") print(randomNodeId, end=": ") print(round(nodeIdCcfMap[randomNodeId], 4)) print("Number of triads random node", end=" ") print(randomNodeId, end=" participates: ") print(snap.GetNodeTriads(sub_graph, randomNodeId)) printWithOutNewLine( "Number of edges that participate in at least one triad:", snap.GetTriadEdges(sub_graph, -1)) #### plots ######## if not os.path.isdir('plots'): os.makedirs('plots') os.chdir(parentDir + "/plots") plotsDir = os.getcwd() snap.PlotOutDegDistr(sub_graph, subGraphName, subGraphName + " Subgraph Degree Distribution") snap.PlotShortPathDistr( sub_graph, subGraphName, subGraphName + " Subgraph Shortest Path Lengths Distribution") snap.PlotSccDistr( sub_graph, subGraphName, subGraphName + " Subgraph Connected Components Size Distribution") snap.PlotClustCf( sub_graph, subGraphName, subGraphName + " Subgraph Clustering Coefficient Distribution") files = os.listdir(plotsDir) for file in files: if not file.endswith(".png"): os.remove(os.path.join(plotsDir, file)) plots = os.listdir(plotsDir) filePrefix = "filename" for file in plots: nameSplit = file.split(".") if (len(nameSplit) == 2): continue if (nameSplit[0] == "ccf"): filePrefix = "clustering_coeff_" elif (nameSplit[0] == "outDeg"): filePrefix = "deg_dist_" elif (nameSplit[0] == "diam"): filePrefix = "shortest_path_" elif (nameSplit[0] == "scc"): filePrefix = "connected_comp_" os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2]) os.chdir(parentDir)