def can_combine_cluster2(cl1, cl2): combine = False temp_graph1 = G.subgraph(cl1) temp_graph2 = G.subgraph(cl2) temp_graph_all = G.subgraph(cl1 + cl2) if len(cl1) >= len(cl2): common_elements = list(set(cl1).intersection(set(cl2))) if len(common_elements) > 0.8*len(cl2): combine = True #print common_elements else: common_elements = list(set(cl2).intersection(set(cl1))) if len(common_elements) > 0.8*len(cl1): combine = True clustering_coeff_1 = nx.average_clustering(temp_graph1) clustering_coeff_2 = nx.average_clustering(temp_graph2) clustering_coeff_all = nx.average_clustering(temp_graph_all) #print cl1 #print cl2 #print (str)(clustering_coeff_1) + " " + (str)(clustering_coeff_2) +" "+ (str)(clustering_coeff_all) #print " " if combine: if (clustering_coeff_all >= .8*clustering_coeff_1) and (clustering_coeff_all >= 0.8*clustering_coeff_2): return True else: if (clustering_coeff_all >= clustering_coeff_1) and (clustering_coeff_all >= clustering_coeff_2): return True return False
def compare_graphs(graph): n = nx.number_of_nodes(graph) m = nx.number_of_edges(graph) k = np.mean(list(nx.degree(graph).values())) erdos = nx.erdos_renyi_graph(n, p=m/float(n*(n-1)/2)) barabasi = nx.barabasi_albert_graph(n, m=int(k)-7) small_world = nx.watts_strogatz_graph(n, int(k), p=0.04) print(' ') print('Compare the number of edges') print(' ') print('My network: ' + str(nx.number_of_edges(graph))) print('Erdos: ' + str(nx.number_of_edges(erdos))) print('Barabasi: ' + str(nx.number_of_edges(barabasi))) print('SW: ' + str(nx.number_of_edges(small_world))) print(' ') print('Compare average clustering coefficients') print(' ') print('My network: ' + str(nx.average_clustering(graph))) print('Erdos: ' + str(nx.average_clustering(erdos))) print('Barabasi: ' + str(nx.average_clustering(barabasi))) print('SW: ' + str(nx.average_clustering(small_world))) print(' ') print('Compare average path length') print(' ') print('My network: ' + str(nx.average_shortest_path_length(graph))) print('Erdos: ' + str(nx.average_shortest_path_length(erdos))) print('Barabasi: ' + str(nx.average_shortest_path_length(barabasi))) print('SW: ' + str(nx.average_shortest_path_length(small_world))) print(' ') print('Compare graph diameter') print(' ') print('My network: ' + str(nx.diameter(graph))) print('Erdos: ' + str(nx.diameter(erdos))) print('Barabasi: ' + str(nx.diameter(barabasi))) print('SW: ' + str(nx.diameter(small_world)))
def MvsD(A, Au, M, D): """docstring for MvsD""" #Calculate the number of nodes print("Number of nodes in A : " + str(len(A.nodes()))) print("Number of nodes in Au : " + str(len(Au.nodes()))) #Calculate the number of links print("Number of links in A : " + str(len(A.edges()))) print("Number of links in Au : " + str(len(Au.edges()))) t = nx.average_clustering(Au) print("network clustering coefficient for Au : " + str(t)) print("") #Calculate the number of nodes print("Number of nodes in M : " + str(len(M.nodes()))) print("Number of nodes in D : " + str(len(D.nodes()))) t = nx.average_clustering(M) print("network clustering coefficient for M : " + str(t)) t = nx.average_clustering(D) print("network clustering coefficient for D : " + str(t)) MavgD = float(sum(M.degree().values()))/float(len(M.nodes())) print("Connectivity M : " + str(MavgD)) DavgD = float(sum(D.degree().values()))/float(len(D.nodes())) print("Connectivity D : " + str(DavgD)) pass
def t_t_cc(path=r"d:\data\9.txt"): rstr = '' g = nx.Graph() g = read_file_txt(g, path) w = [14,13,12,6] print nx.average_clustering(g) for each in w: R=gRa(g,each) pg=r_perturbR(g, R) rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg)) rstr=rstr+'\n' try: path=path.replace('9','9_cc') f=open(path, 'w') except: print "int Create File error" p = np.array(w)/14.0 for each in p: pg=r_perturbS(g, each) rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg)) rstr=rstr+'\n' f.write(rstr) f.close()
def Type2AlmostCompleteGraph(n, m): if (BinomialCoefficient(n - 2, 2) + 4 <= m) and (m <= BinomialCoefficient(n - 1, 2) + 1): first_candidate = nx.complete_graph(n - 2) remaining_edges = m - BinomialCoefficient(n - 2, 2) first_candidate.add_edge(n - 2, 0) first_candidate.add_edge(n - 2, 1) for vertex_index in range(remaining_edges - 2): first_candidate.add_edge(n - 1, vertex_index) first_coefficient = nx.average_clustering(first_candidate) second_candidate = nx.complete_graph(n - 2) second_candidate.add_edge(n - 2, n - 1) remaining_edges = m - BinomialCoefficient(n - 2, 2) - 1 number_of_common_neighbors = remaining_edges / 2 for vertex_index in range(number_of_common_neighbors): second_candidate.add_edge(vertex_index, n - 2) second_candidate.add_edge(vertex_index, n - 1) if (remaining_edges - 2 * number_of_common_neighbors) == 1: second_candidate.add_edge(vertex_index + 1, n - 2) second_coefficient = nx.average_clustering(second_candidate) if first_coefficient > second_coefficient: G = first_candidate.copy() else: G = second_candidate.copy() return G
def can_combine_cluster(cl1, cl2): global G cl1_int = [] cl2_int = [] for string in cl1: cl1_int.append(int(string)) for string in cl2: cl2_int.append(int(string)) temp_graph1 = G.subgraph(cl1_int) temp_graph2 = G.subgraph(cl2_int) temp_graph_all = G.subgraph(cl1_int + cl2_int) clustering_coeff_1 = nx.average_clustering(temp_graph1) clustering_coeff_2 = nx.average_clustering(temp_graph2) clustering_coeff_all = nx.average_clustering(temp_graph_all) # print (str)(clustering_coeff_1) + " " + (str)(clustering_coeff_2) +" "+ (str)(clustering_coeff_all) if clustering_coeff_1 == 1: clustering_coeff_1 = 0.96 if clustering_coeff_2 == 1: clustering_coeff_2 = 0.96 if (clustering_coeff_1 == 0) and (clustering_coeff_2 == 0): return False fraction = 0.95 if (clustering_coeff_all > fraction * clustering_coeff_1) and ( clustering_coeff_all > fraction * clustering_coeff_2 ): # print "combine" return True return False
def t_GrQc_cc(path=r"d:\data\CA-GrQc.txt"): rstr = '' g = nx.Graph() g = read_file_txt(g, path) w = [14496,13454,12394,9782] for each in w: R=gRa(g,each) pg=r_perturbR(g, R) rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg)) rstr=rstr+'\n' try: path=path.replace('GrQc','GrQc_cc') f=open(path, 'w') except: print "int readFileTxt open error" p = np.array(w)/14496.0 for each in p: pg=r_perturbS(g, each) rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg)) rstr=rstr+'\n' f.write(rstr) f.close()
def algorithm(w1,w2,w3,w4,G1,G2,G3,G4): try: cc=np.array([nx.average_clustering(G1,weight='weight'),nx.average_clustering(G2,weight='weight'),nx.average_clustering(G3,weight='weight'),nx.average_clustering(G4,weight='weight')]) spl=np.array([nx.average_shortest_path_length(G1,weight='weight'),nx.average_shortest_path_length(G2,weight='weight'),nx.average_shortest_path_length(G3,weight='weight'),nx.average_shortest_path_length(G4,weight='weight')]) nds=np.array([nx.number_of_nodes(G1),nx.number_of_nodes(G2),nx.number_of_nodes(G3),nx.number_of_nodes(G4)]) edgs= np.array([nx.number_of_edges(G1),nx.number_of_edges(G2),nx.number_of_edges(G3),nx.number_of_edges(G4)]) if valid(cc): cc=stats.zscore(cc) else: cc=np.array([.1,.1,.1,.1]) cc= cc-min(cc)+.1 if valid(spl): spl=stats.zscore(spl) else: spl=np.array([.1,.1,.1,.1]) spl= spl-min(spl)+.1 if valid(nds): nds=stats.zscore(nds) else: nds=np.array([.1,.1,.1,.1]) nds = nds-min(nds)+.1 if valid(edgs): edgs=stats.zscore(edgs) else: edgs=np.array([.1,.1,.1,.1]) edgs=edgs-min(edgs)+.1 r1=(w1*cc[0]+w2*spl[0]+w3*nds[0]+w4*edgs[0])*1000 r2=(w1*cc[1]+w2*spl[1]+w3*nds[1]+w4*edgs[1])*1000 r3=(w1*cc[2]+w2*spl[2]+w3*nds[2]+w4*edgs[2])*1000 r4=(w1*cc[3]+w2*spl[3]+w3*nds[3]+w4*edgs[3])*1000 d={'Player 1:': r1, 'Player 2:': r2,'Player 3:': r3, 'Player 4:': r4} rank = sorted(d.items(), key=lambda x: x[1], reverse=True) return ["USAU RANKINGS",str(rank[0][0])+ " " + str(int(rank[0][1])),str(rank[1][0])+" "+ str(int(rank[1][1])),str(rank[2][0])+" "+ str(int(rank[2][1])),str(rank[3][0])+" "+str(int(rank[3][1]))] except: return ["Unable to compute rankings! Need data","Player 1","Player 2","Player 3","Player 4"]
def getCoherenceMeasure(essay): graph = makeWordGraph(essay) # obtain clustering coefficient clustCoeffList=nx.clustering(graph) ##### print getScore(clustCoeffList,graph) print nx.average_clustering(graph)
def t_facebook_cc(path=r"d:\data\facebook1.txt"): rstr = '' g = nx.Graph() g = read_file_txt(g, path) w = [1945, 1294, 860, 643] for each in w: R=gRa(g,each) pg=r_perturbR(g, R) rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg)) rstr=rstr+'\n' try: path=path.replace('book1','book1_cc') f=open(path, 'w') except: print "int readFileTxt open error" p = np.array(w)/4813.0 for each in p: pg=r_perturbS(g, each) rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg)) rstr=rstr+'\n' f.write(rstr) f.close()
def main(): tempo_dir = "../corpus-local/tempo-txt" file_regex = ".*\.txt" G = build_graph(tempo_dir, file_regex) """ ccs = nx.clustering(G) avg_clust = sum(ccs.values()) / len(ccs) """ print tempo_dir print "\tAda " + str(len(G.nodes())) + " node." print "\tAda " + str(len(G.edges())) + " edge." print "\tClustering coefficient : " + str(nx.average_clustering(G)) print "\tAverage shortest path length" for g in nx.connected_component_subgraphs(G): print "\t\t" + str(nx.average_shortest_path_length(g)) kompas_dir = "../corpus-local/kompas-txt" G = build_graph(kompas_dir, file_regex) print kompas_dir print "\tAda " + str(len(G.nodes())) + " node." print "\tAda " + str(len(G.edges())) + " edge." print "\tClustering coefficient : " + str(nx.average_clustering(G)) print "\tAverage shortest path length" for g in nx.connected_component_subgraphs(G): print "\t\t" + str(nx.average_shortest_path_length(g))
def t_Gnutella_cc(path=r"d:\data\p2p-Gnutella08.txt"): rstr = '' g = nx.Graph() g = read_file_txt(g, path) w = [20777,18700,17995,17023] for each in w: R=gRa(g,each) pg=r_perturbR(g, R) rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg)) rstr=rstr+'\n' try: path=path.replace('p2p-Gnutella','GrQcp2p-Gnutella_cc') f=open(path, 'w') except: print "int Create File error" p = np.array(w)/20777.0 for each in p: pg=r_perturbS(g, each) rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg)) rstr=rstr+'\n' f.write(rstr) f.close()
def check_and_merge_clusters(index): global clusters global G given_cluster = [] total_clusters = len(clusters) cluster_coeff_all = [0]*total_clusters cluster_coeff_temp = [0]*total_clusters for string in clusters[index]: given_cluster.append(int(string)) given_graph = G.subgraph(given_cluster) clustering_coeff_given = nx.average_clustering(given_graph) temp_index = 0 while temp_index < total_clusters: temp_cluster = [] for string in clusters[temp_index]: temp_cluster.append(int(string)) temp_graph = G.subgraph(temp_cluster) temp_graph_all = G.subgraph(temp_cluster + given_cluster) clustering_coeff_all = nx.average_clustering(temp_graph_all) clustering_coeff_temp = nx.average_clustering(temp_graph) cluster_coeff_all[temp_index] = clustering_coeff_all cluster_coeff_temp[temp_index] = clustering_coeff_temp temp_index = temp_index + 1 # Find the index with highest coefficient and combine them max_index = cluster_coeff_all.index(max(cluster_coeff_all)) if clustering_coeff_given > .94: clustering_coeff_given = 0.94 if cluster_coeff_temp[max_index] > .94: cluster_coeff_temp[max_index] =0.94 if (cluster_coeff_all[max_index] >= .95*clustering_coeff_given) and (cluster_coeff_all[max_index] >= .95*cluster_coeff_temp[max_index]): combine_cluster(index, max_index)
def test_clustering(size): print("Barabasi-Albert:") ba = networkx.barabasi_albert_graph(1000, 4) print("Clustering: ", networkx.average_clustering(ba)) print("Average length: ", networkx.average_shortest_path_length(ba)) print("Watts-Strogatz:") ws = networkx.watts_strogatz_graph(size, 4, 0.001) print("Clustering: ", networkx.average_clustering(ws)) print("Average length: ", networkx.average_shortest_path_length(ws))
def gen_graph_stats (graph): G = nx.read_graphml(graph) stats = {} edges, nodes = 0,0 for e in G.edges_iter(): edges += 1 for n in G.nodes_iter(): nodes += 1 stats['Edges'] = (edges,'The number of edges within the Graph') stats['Nodes'] = (nodes, 'The number of nodes within the Graph') print "%i edges, %i nodes" % (edges, nodes) # Accessing the highest degree node center, degree = sorted(G.degree().items(), key=itemgetter(1), reverse=True)[0] stats['Center Node'] = ('%s: %0.5f' % (center,degree),'The center most node in the graph. Which has the highest degree') hairball = nx.subgraph(G, [x for x in nx.connected_components(G)][0]) print "Average shortest path: %0.4f" % nx.average_shortest_path_length(hairball) stats['Average Shortest Path Length'] = (nx.average_shortest_path_length(hairball), '') # print "Center: %s" % G[center] # print "Shortest Path to Center: %s" % p print "Degree: %0.5f" % degree stats['Degree'] = (degree,'The node degree is the number of edges adjacent to that node.') print "Order: %i" % G.number_of_nodes() stats['Order'] = (G.number_of_nodes(),'The number of nodes in the graph.') print "Size: %i" % G.number_of_edges() stats['Size'] = (G.number_of_edges(),'The number of edges in the graph.') print "Clustering: %0.5f" % nx.average_clustering(G) stats['Average Clustering'] = (nx.average_clustering(G),'The average clustering coefficient for the graph.') print "Transitivity: %0.5f" % nx.transitivity(G) stats['Transitivity'] = (nx.transitivity(G),'The fraction of all possible triangles present in the graph.') part = community.best_partition(G) # values = [part.get(node) for node in G.nodes()] # nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) # plt.show() mod = community.modularity(part,G) print "modularity: %0.5f" % mod stats['Modularity'] = (mod,'The modularity of a partition of a graph.') knn = nx.k_nearest_neighbors(G) print knn stats['K Nearest Neighbors'] = (knn,'the average degree connectivity of graph.\nThe average degree connectivity is the average nearest neighbor degree of nodes with degree k. For weighted graphs, an analogous measure can be computed using the weighted average neighbors degre') return G, stats
def get_average_cluster_coefficient(filename): import networkx as nx threshold = 0 f = open(filename[:-4]+'_average_cc.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) print 'threshold: %f, average cluster coefficient: %f' %(threshold, nx.average_clustering(G)) f.write("%f\t%f\n" % (threshold, nx.average_clustering(G))) f.close()
def get_small_worldness(filename): import networkx as nx threshold = 0 f = open(filename[:-4]+'_small_worldness.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) cluster = nx.average_clustering(G) ER_cluster = nx.average_clustering(ER_graph) transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' %(threshold, cluster, ER_cluster, transi, ER_transi) f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values)/len(values))) if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) f.write("\n") f.close() print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity"
def test_random_reference(): G = nx.connected_watts_strogatz_graph(50, 6, 0.1, seed=rng) Gr = random_reference(G, niter=1, seed=rng) C = nx.average_clustering(G) Cr = nx.average_clustering(Gr) assert_true(C > Cr) assert_raises(nx.NetworkXError, random_reference, nx.Graph()) assert_raises(nx.NetworkXNotImplemented, random_reference, nx.DiGraph()) H = nx.Graph(((0, 1), (2, 3))) Hl = random_reference(H, niter=1, seed=rng)
def get_small_worldness(G, thr): f = open(out_prfx + 'small_worldness.dat', 'a') g = open(out_prfx + 'cc_trans_ER.dat', 'a') #g.write('r(thre.)\t\cc_A\tcc_ER\ttran_A\ttran_ER\n') ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) # erdos-renyi, binomial random graph generator ...(N,D:density) cluster = nx.average_clustering(G) # clustering coef. of whole network ER_cluster = nx.average_clustering(ER_graph) #cc of random graph transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) g.write("%f\t%f\t%f\t%f\t%f\n" % (thr, cluster,ER_cluster,transi,ER_transi )) f.write("%f\t%f\t%f" % (thr, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values)/len(values))) # pathlenght if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) # S_WS ~ small worldness f.write("\n") f.close() g.close()
def run(G, cut_pct, iterations=10): print nx.average_clustering(G) nodes = G.nodes() edges = G.edges() for i in range(iterations): np.random.shuffle(nodes) cut_count = int(cut_pct*len(nodes)) selected_nodes = nodes[0:-cut_count] not_selected_nodes = set(nodes)-set(selected_nodes) not_selected_edges = G.subgraph(not_selected_nodes).edges() H = G.subgraph(nodes) H.remove_edges_from(not_selected_edges) H.remove_nodes_from(list(set(not_selected_nodes)&set(nx.isolates(H)))) print nx.average_clustering(H)
def random_25kc(G0, nswap=1, max_tries=100): """ 只判断四个节点及邻居节点度相关的聚类系数 """ if nswap>max_tries: raise nx.NetworkXError("Number of swaps > number of tries allowed.") if len(G0) < 4: raise nx.NetworkXError("Graph has less than four nodes.") G = copy.deepcopy(G0) n=0 swapcount=0 while swapcount < nswap: (u,x)=random.sample(G.nodes(),2) v=random.choice(list(G[u])) y=random.choice(list(G[x])) if G.degree(v)!=G.degree(y) or v==y or len([u,v,x,y])<4: continue # 若节点v,y度不相同,重新选择 n+=1 if (y not in G.neighbors(u)) and (v not in G.neighbors(x)) and ((u,v)in G.edges()) and ((x,y) in G.edges()): G.add_edge(u,y) G.add_edge(x,v) G.remove_edge(u,v) G.remove_edge(x,y) swapcount+=1 if not nx.is_connected(G): G.add_edge(u,v) G.add_edge(x,y) G.remove_edge(u,y) G.remove_edge(x,v) swapcount -= 1 continue l = map(lambda t:(t[1],t[0]), G0.degree([u,v,x,y]+list(G[u])+list(G[v])+list(G[x])+list(G[y])).items()) #(度,节点)组成的列表 D = dict_degree_nodeslist(l) for i in range(len(D)): avcG0 = nx.average_clustering(G0, nodes=D.values()[i], weight=None, count_zeros=True) avcG = nx.average_clustering(G, nodes=D.values()[i], weight=None, count_zeros=True) i += 1 if avcG0 != avcG: #若置乱前后度相关的聚类系数不同,则撤销此次置乱操作 G.add_edge(u,v) G.add_edge(x,y) G.remove_edge(u,y) G.remove_edge(x,v) swapcount -= 1 break if n >= max_tries: e=('Maximum number of swap attempts (%s) exceeded '%n + 'before desired swaps achieved (%s).'%nswap) print e break return G
def compareAvgClusteringCoeff(masterGraph,wordGraph,worksheet,row): avgClusteringMaster = nx.average_clustering(masterGraph) avgClusteringWord = nx.average_clustering(wordGraph) #worksheet.write(row,1,avgClusteringMaster) #worksheet.write(row,2,avgClusteringWord) result = False if(avgClusteringMaster >= avgClusteringWord): result = True #worksheet.write(row,3,result) if result == True: return 1 else: return -1
def Attributes_of_Graph(G): print "*Statistic attributes of graphs:" print "N", nx.number_of_nodes(G) print "M", nx.number_of_edges(G) print "C", nx.average_clustering(G) #print "<d>", nx.average_shortest_path_length(G) print "r", nx.degree_assortativity_coefficient(G) degree_list = list(G.degree_iter()) max_degree = 0 min_degree = 0 avg_degree_1 = 0.0 avg_degree_2 = 0.0 for node in degree_list: avg_degree_1 = avg_degree_1 + node[1] avg_degree_2 = avg_degree_2 + node[1]*node[1] if node[1] > max_degree: max_degree = node[1] if node[1] < min_degree: min_degree = node[1] #end for avg_degree = avg_degree_1/len(degree_list) avg_degree_square = (avg_degree_2/len(degree_list)) / (avg_degree*avg_degree) print "<k>", avg_degree print "k_max", max_degree print "H", avg_degree_square print "DH", float(max_degree-min_degree)/G.number_of_nodes()
def calc_clustering_coefficient(g, dest_file): """ calc_clustering_coefficient(g) Calculate & plot clustering coefficient of the graph g and writes data to the created data output file :param g: graph as source :return: --- """ func_intro = "\n\nClustering Co-Efficient ..." logging.info(cs_ref, func_intro) print func_intro with open(dest_file, "a") as dat_file: dat_file.write(func_intro) cce = nx.clustering(g) # calculate clustering co-efficient with open(dest_file, "a") as dat_file: dat_file.write("\n\tClustering Coefficients for nodes in graph = \t" + str(cce)) average_cce = nx.average_clustering(g) with open(dest_file, "a") as dat_file: dat_file.write("\n\tAverage Clustering Coefficient for graph = \t" + str(average_cce)) for edge in g.edges(): # plot clustering co-efficient if floor(edge[0] / 5.) != floor(edge[1] / 5.): if random.random() < 0.95: g.remove_edge(edge[0], edge[1]) plt.figure(3) fixed_pos = {1: (0, 0), 10: (1, 1), 30: (1, 0), 50: (0, 1)} pos = nx.spring_layout(g, fixed=fixed_pos.keys(), pos=fixed_pos) nx.draw_networkx(g, pos=pos) plt.title("Clustering Co-efficient" + src_file) plt.savefig("plots/cs1_clustering_coefficient.png") plt.show()
def analyze_first_level_panels(): results = {} for d in first_level_topic_list: print "\n*********DESCRIPTOR: " + first_level_topic_list[d] + "(" + str(d) + ")" G = build_panel_network_by_descriptor(d) print "\nDESCRIPTOR: " + first_level_topic_list[d] + "(" + str(d) + ")" print "Nodes:", G.number_of_nodes() print "Edges:", G.number_of_edges() res_clique = analize_cliques(G) res_degree = analize_degrees(G) res_weight = analize_edges(G) d_final = dict(res_clique) d_final.update(res_degree) d_final.update(res_weight) d_final['id'] = d d_final['avg_clustering'] = nx.average_clustering(G) results[first_level_topic_list[d]] = d_final print "Writing json..." json.dump(results, open('./networks/first_level_panels_analysis.json','w'), indent = 2) print "Writing csvs..." df = DataFrame(results) df.to_csv('./networks/first_level_panels_analysis.csv') dfinv = df.transpose() dfinv.to_csv('./networks/first_level_panels_analysis_inv.csv')
def printStats(filename): ''' Converts json adjacency list into networkx to calculate and print the graphs's - average clustering coefficient - overall clustering coefficient - maximum diameter - average diameter - number of paritions using community.best_parition - modularity of community.best_partition ''' g = makeGraphFromJSON(filename) print "Average Clustering Coefficient: %f" % nx.average_clustering(g) print "Overall Clustering Coefficient: %f" % nx.transitivity(g) connected_subgraphs = list(nx.connected_component_subgraphs(g)) largest = max(nx.connected_component_subgraphs(g), key=len) print "# Connected Components: %d" % len(connected_subgraphs) print " Maximal Diameter: %d" % nx.diameter(largest) print " Average Diameter: %f" % nx.average_shortest_path_length(largest) # Find partition that maximizes modularity using Louvain's algorithm part = community.best_partition(g) print "# Paritions: %d" % (max(part.values()) + 1) print "Louvain Modularity: %f" % community.modularity(part, g)
def ws_calc(path): """ Given a path to a file graph generated by the GMM, calucualte C(p) and L(p) """ G=nx.read_graphml(path) file_split=path.split('_') return({'p':float(file_split[4]), 'cc':nx.average_clustering(G), 'avg.pl':nx.average_shortest_path_length(G)})
def connected_components(self): """ Returns basic statistics about the connected components of the graph. This includes their number, order, size, diameter, radius, average clusttering coefficient, transitivity, in addition to basic info about the largest and smallest connected components. """ cc_stats = {} cc = nx.connected_components(self.graph.structure) for index, component in enumerate(cc): cc_stats[index] = {} this_cc = cc_stats[index] this_cc["order"] = len(component) this_cc["size"] = len(self.graph.structure.edges(component)) subgraph = self.graph.structure.subgraph(component) this_cc["avg_cluster"] = nx.average_clustering(subgraph) this_cc["transitivity"] = nx.transitivity(subgraph) eccentricity = nx.eccentricity(subgraph) ecc_values = eccentricity.values() this_cc["diameter"] = max(ecc_values) this_cc["radius"] = min(ecc_values) return cc_stats
def get_characteristics(G, filename): import networkx as nx print 'calculating characteristics' n_nodes = nx.number_of_nodes(G) n_edges = nx.number_of_edges(G) n_components = nx.number_connected_components(G) print 'number of nodes:', n_nodes print 'number of edges:', n_edges print 'number of components:', n_components print 'degree histogram' check_sum = 0. degree_hist = {} for node in G: if G.degree(node) not in degree_hist: degree_hist[G.degree(node)] = 1 else: degree_hist[G.degree(node)] += 1 keys = degree_hist.keys() keys.sort() for item in keys: print item, degree_hist[item] check_sum += float(degree_hist[item])/float(n_nodes) print "check sum: %f" % check_sum #print 'clustering coefficient' print 'clustering coefficient of full network', nx.average_clustering(G) return 0
def whole_graph_metrics(graph, weighted=False): graph_metrics = {} # Shortest average path length graph_metrics['avg_shortest_path'] = \ nx.average_shortest_path_length(graph, weight=weighted) # Average eccentricity ecc_dict = nx.eccentricity(graph) graph_metrics['avg_eccentricity'] = np.mean(np.array(ecc_dict.values())) # Average clustering coefficient # NOTE: Option to include or exclude zeros graph_metrics['avg_ccoeff'] = \ nx.average_clustering(graph, weight=weighted, count_zeros=True) # Average node betweeness avg_node_btwn_dict = nx.betweenness_centrality(graph, normalized=True) graph_metrics['avg_node_btwn'] = \ np.mean(np.array(avg_node_btwn_dict.values())) # Average edge betweeness avg_edge_btwn_dict = nx.edge_betweenness_centrality(graph, normalized=True) graph_metrics['avg_edge_btwn'] = \ np.mean(np.array(avg_edge_btwn_dict.values())) # Number of isolates graph_metrics['isolates'] = len(nx.isolates(graph)) return graph_metrics
print(nx.in_degree_centrality(G)) print(nx.out_degree_centrality(G)) # Global clustering coefficient or transitivity of a graph # Remember to convert to undirected graph print('\nClustering coefficients of Ego users') # Getting average clustering for the starting users starting_users_clustering = [] for users in starting_users: x = nx.clustering(G.to_undirected(), users) starting_users_clustering.append(x) print(x) print('Average clustering for G is {}'.format( nx.average_clustering(G.to_undirected()))) print('\nTransitivity') print(nx.transitivity(G.to_undirected())) # Distances: short path between userName1 and userName2 # print(nx.shortest_path(G, userName1, userName2)) # print(nx.shortest_path_length(G, userName1, userName2)) # Diameter and eccentricity are applicable when the graph is strongly connected # number of strongly/weakly connected components if nx.is_strongly_connected(G): print('G is strongly connected') print(sorted(nx.strongly_connected_components(G))) print(nx.number_connected_components(G)) print( nx.eccentricity(G)
def master(struct_save_name="ProteinDict_ten_thousand", edge_type="ligands", edge_comm_num=3, property="processes", graph_filename="Protein-Protein_Graph_Default_Name", load_graph=False, print_dict_props=False, bipart_graph=False, bipartite_filename="Bipartite_Default_Name", show_plots=False, avg_clust=False, print_graph_props=False, degree_dist=False, k_clique=False, mod_max=False, fluid=False, louv=False, k_property=20, num_k_cliques=7, num_fluid_comms=100, std_val=-0.5, k_clique_opt=False, start_k_clique_opt=3, end_k_clique_opt=10, num_trials_k=3, opt_fluid=False, start_fluid_comms=100, end_fluid_comms=300, fluid_step_size=20, fluid_num_trials=3): Structure_Dict = {} Structure_Dict = hf.readDict(struct_save_name, Structure_Dict) #Here we print out some helpful information about the dataset we are using if print_dict_props == True: avg_ligands = hf.get_mean_property(Structure_Dict, "ligands") print("Average Number of Ligands:", avg_ligands) avg_subunits = hf.get_mean_property(Structure_Dict, "subunits") print("Average Number of Subunits:", avg_subunits) avg_functions = hf.get_mean_property(Structure_Dict, "functions") print("Average Number of Functions:", avg_functions) avg_processes = hf.get_mean_property(Structure_Dict, "processes") print("Average Number of Processes:", avg_processes) # Get Total Number of Ligands, Functions, Proccesses and Subunits num_ligands = len(hf.get_all_property(Structure_Dict, "ligands")) print("Number of Ligands:", num_ligands) num_subunits = len(hf.get_all_property(Structure_Dict, "subunits")) print("Number of Subunits:", num_subunits) num_functions = len(hf.get_all_property(Structure_Dict, "functions")) print("Number of Functions:", num_functions) num_processes = len(hf.get_all_property(Structure_Dict, "processes")) print("Number of Processes:", num_processes) #Here we create a bipartite graph of ligands and proteins, which can be analyzed on its own, or used to #to create a projected graph. if bipart_graph == True: Protein_Bipartite_Graph = nx.Graph() struct_name_set = set() # Create a bipartite graph in which there are structure nodes and ligand ndoes for (struct_name, struct) in Structure_Dict.items(): struct_name_set.add(struct_name) hf.create_Edge(struct, Protein_Bipartite_Graph, property) print('Bipartite Nodes:', len(Protein_Bipartite_Graph.nodes())) print('Bipartite Edges:', len(Protein_Bipartite_Graph.edges())) nx.write_gml(Protein_Bipartite_Graph, bipartite_filename) #Here we create a new projected graph if load_graph == False: # Create a projected graph from the bipartite Protein_Graph = hf.create_projected_graph(Structure_Dict, edge_comm_num, edge_type) # Get the Giant Component of graph Protein_Graph_GC = Protein_Graph.subgraph( sorted(nx.connected_components(Protein_Graph), key=len, reverse=True)[0]) nx.write_gml(Protein_Graph, graph_filename) #If the garph has already been created, load in the graph to save time if load_graph == True: Protein_Graph = nx.read_gml(graph_filename) Protein_Graph_GC = Protein_Graph.subgraph( sorted(nx.connected_components(Protein_Graph), key=len, reverse=True)[0]) #Print out some useful informatoion about the graph if print_graph_props == True: print('Protein_Graph Nodes:', len(Protein_Graph.nodes())) print('Protein_Graph Edges:', len(Protein_Graph.edges())) print('Protein_Graph Num connected Components:', nx.number_connected_components(Protein_Graph)) print('Protein_Graph Num edges in largest Components:', len(Protein_Graph_GC.edges())) print('Protein_Graph Num nodes in largest Components:', len(Protein_Graph_GC.nodes())) # K-Clique Implementation if k_clique == True: print('Begin K_Clique') #Create a copy of the graph, which will be used when we lable nodes by community k_clique_graph = Protein_Graph_GC.copy() #You can use a predetermined k, or optimize the k for the graph if k_clique_opt == False: k_clique_comms_pre_del = nx.algorithms.community.k_clique_communities( Protein_Graph_GC, num_k_cliques) k_clique_comms_pre_del = list(list(k_clique_comms_pre_del)) else: k_clique_comms_pre_del = hf.opt_k_clique(Protein_Graph_GC, start_k_clique_opt, end_k_clique_opt, num_trials_k) # Get the average size of found communities avg_comm_pre_del = sum([len(comm) for comm in k_clique_comms_pre_del ]) / len(k_clique_comms_pre_del) #Get the graph similiarty score K_clique_score_pre_del = hf.score_graph(k_clique_comms_pre_del, Structure_Dict, k_property, property, already_list=True) print(K_clique_score_pre_del, len(k_clique_comms_pre_del), avg_comm_pre_del, hf.num_nodes(k_clique_comms_pre_del)) #Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(k_clique_comms_pre_del, Structure_Dict, k_property, property, "K_Clique_" + str(k_property) + "_" + property + "_Pre_Del_Comms_" + edge_type + "_edges", show_plots=show_plots) #Delete some communities based off there modularity score, and the standard deviation of community scores in the graph k_clique_comms = hf.delete_comms(Protein_Graph_GC, k_clique_comms_pre_del, std_val) #Get the graph similiarty score after deletion k_clique_score = hf.score_graph(k_clique_comms, Structure_Dict, k_property, property, already_list=True) # Get the average size of found communities after deleting 'bad' communities avg_comm = sum([len(comm) for comm in k_clique_comms]) / len(k_clique_comms) print(k_clique_score, len(k_clique_comms), avg_comm, hf.num_nodes(k_clique_comms)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(k_clique_comms, Structure_Dict, k_property, property, "K_Clique_" + str(k_property) + "_" + property + "_Comms_" + edge_type + "_edges", show_plots=show_plots) # Label nodes by community nx.set_node_attributes(k_clique_graph, hf.list_to_dict(k_clique_comms_pre_del), "Community") #Save the graph with nodes labled by community nx.write_gml( k_clique_graph, "K_Clique_Protein_Protein_" + edge_type + "_edges_Network_" + str(k_property) + "_" + property + ".gml") print('End K_Clique') # Modularity Maximization Implementation if mod_max == True: print('Begin Modularity Maximization') # Create a copy of the graph, which will be used when we lable nodes by community mod_graph = Protein_Graph_GC.copy() #Find communities using modularity maximization mod_max_comms_pre_del = nx.algorithms.community.modularity_max.greedy_modularity_communities( Protein_Graph_GC) mod_max_comms_pre_del = list(list(mod_max_comms_pre_del)) # Get the average size of found communities avg_comm_pre_del = sum([len(comm) for comm in mod_max_comms_pre_del ]) / len(mod_max_comms_pre_del) # Get the graph similiarty score mod_max_score_pre_del = hf.score_graph(mod_max_comms_pre_del, Structure_Dict, k_property, property, already_list=True) print(mod_max_score_pre_del, len(mod_max_comms_pre_del), avg_comm_pre_del, hf.num_nodes(mod_max_comms_pre_del)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(mod_max_comms_pre_del, Structure_Dict, k_property, property, "Mod_Max" + str(k_property) + "_" + property + "_Pre_Del_Comms_" + edge_type + "_edges", show_plots=show_plots) # Delete some communities based off there modularity score, and the standard deviation of community scores in the graph mod_max_comms = hf.delete_comms(Protein_Graph_GC, mod_max_comms_pre_del, std_val) # Get the graph similiarty score after deletion mod_max_score = hf.score_graph(mod_max_comms, Structure_Dict, k_property, property, already_list=True) # Get the average size of found communities after deleting 'bad' communities avg_comm = sum([len(comm) for comm in mod_max_comms]) / len(mod_max_comms) print(mod_max_score, len(mod_max_comms), avg_comm, hf.num_nodes(mod_max_comms)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(mod_max_comms, Structure_Dict, k_property, property, "Mod_Max" + str(k_property) + "_" + property + "_Comms_" + edge_type + "_edges", show_plots=show_plots) # Label nodes by community nx.set_node_attributes(mod_graph, hf.list_to_dict(mod_max_comms_pre_del), "Community") # Save the graph with nodes labled by community nx.write_gml( mod_graph, "Mod_Max_Protein_Protein_" + edge_type + "_edges_Network_" + str(k_property) + "_" + property + ".gml") print('End Modularity Maximization') # Fluid Implementation if fluid == True: print('Begin Fluid') # Create a copy of the graph, which will be used when we lable nodes by community fluid_graph = Protein_Graph_GC.copy() # You can use a predetermined number of communities, or optimize the number of communieis for the graph if opt_fluid == False: fluid_comms_pre_del = nx.algorithms.community.asyn_fluid.asyn_fluidc( Protein_Graph_GC, num_fluid_comms) fluid_comms_pre_del = list(list(fluid_comms_pre_del)) else: fluid_comms_pre_del = hf.opt_fluid(Protein_Graph_GC, start_fluid_comms, end_fluid_comms, fluid_step_size, fluid_num_trials) # Get the average size of found communities avg_comm_pre_del = sum([len(comm) for comm in fluid_comms_pre_del ]) / len(fluid_comms_pre_del) # Get the graph similiarty score fluid_score_pre_del = hf.score_graph(fluid_comms_pre_del, Structure_Dict, k_property, property, already_list=True) print(fluid_score_pre_del, len(fluid_comms_pre_del), avg_comm_pre_del, hf.num_nodes(fluid_comms_pre_del)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(fluid_comms_pre_del, Structure_Dict, k_property, property, "Fluid" + str(k_property) + "_" + property + "_Pre_Del_Comms_" + edge_type + "_edges", show_plots=show_plots) # Delete some communities based off there modularity score, and the standard deviation of community scores in # the graph fluid_comms = hf.delete_comms(Protein_Graph_GC, fluid_comms_pre_del, std_val) # Get the graph similiarty score after deletion fluid_score = hf.score_graph(fluid_comms, Structure_Dict, k_property, property, already_list=True) # Get the average size of found communities after deleting 'bad' communities avg_comm = sum([len(comm) for comm in fluid_comms]) / len(fluid_comms) print(fluid_score, len(fluid_comms), avg_comm, hf.num_nodes(fluid_comms)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(fluid_comms, Structure_Dict, k_property, property, "Fluid" + str(k_property) + "_" + property + "_Comms_" + edge_type + "_edges", show_plots=show_plots) # Label nodes by community nx.set_node_attributes(fluid_graph, hf.list_to_dict(fluid_comms_pre_del), "Community") # Save the graph with nodes labled by community nx.write_gml( fluid_graph, "Fluid_Protein_Protein_" + edge_type + "_edges_Network_" + str(k_property) + "_" + property + ".gml") print('End Fluid') # louvian Implmentation if louv == True: print('Begin Louvain') # Create a copy of the graph, which will be used when we lable nodes by community louv_graph = Protein_Graph_GC.copy() #Create communities using the louvian opt_louv = hf.optimize_louv(Protein_Graph_GC, Structure_Dict, 100, 1, property, k_property) louv_comm_pre_del = hf.Get_Community(opt_louv[0]) # Get the average size of found communities avg_comm_pre_del = sum([len(comm) for comm in louv_comm_pre_del ]) / len(louv_comm_pre_del) # Get the graph similiarty score louv_score_pre_del = hf.score_graph(louv_comm_pre_del, Structure_Dict, k_property, property, already_list=True) print(louv_score_pre_del, len(louv_comm_pre_del), avg_comm_pre_del, hf.num_nodes(louv_comm_pre_del)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(louv_comm_pre_del, Structure_Dict, k_property, property, "Louv" + str(k_property) + "_" + property + "_Pre_Del_Comms_" + edge_type + "_edges", show_plots=show_plots) # Delete some communities based off there modularity score, and the standard deviation of community scores in the graph louv_comms = hf.delete_comms(Protein_Graph_GC, louv_comm_pre_del, std_val) # Get the graph similiarty score after deletion louv_score = hf.score_graph(louv_comms, Structure_Dict, k_property, property, already_list=True) # Get the average size of found communities after deleting 'bad' communities avg_comm = sum([len(comm) for comm in louv_comms]) / len(louv_comms) print(louv_score, len(louv_comms), avg_comm, hf.num_nodes(louv_comms)) # Create a plot of each community similiarity score vs. a random communities similarity score hf.plot_vs_random(louv_comms, Structure_Dict, k_property, property, "Louv" + str(k_property) + "_" + property + "_Comms_" + edge_type + "_edges", show_plots=show_plots) # Label nodes by community nx.set_node_attributes(louv_graph, hf.list_to_dict(louv_comm_pre_del), "Community") # Save the graph with nodes labled by community nx.write_gml( louv_graph, "Louv_Protein_Protein_" + edge_type + "_edges_Network_" + str(k_property) + "_" + property + ".gml") print('End Louvain') # Create Degree Distribution Plot and print out the expexted degree of the node if degree_dist == True: x, y, expected_degree = hf.degree_dist(Protein_Graph_GC) print("Expected Degree:", expected_degree) plt.figure() plt.loglog(x, y, 'bo') plt.title("Degree distribution") plt.xlabel("log(degree values)") plt.ylabel("log(degree frequencies)") plt.savefig('degree_dist_' + edge_type + '.png') plt.show() #Find the average clustering coefficient of the graph if avg_clust == True: average_clustering = nx.average_clustering(Protein_Graph_GC) print("Average Clustering Coefficient:", average_clustering)
def network_models(): erdos = [] watts = [] barabasi = [] # generate 30 networks of each model print("Generating networks...") bar = progressbar.ProgressBar(max_value=30) for i in range(30): bar.update(i) erdos.append(nx.erdos_renyi_graph(500, 0.1)) watts.append(nx.watts_strogatz_graph(1000, 10, 0.1)) barabasi.append(nx.barabasi_albert_graph(2000, 10)) bar.finish() # degree distribution (one of each) print("Finding degree distributions...") dists = {} dists["Erdös-Rényi"] = degree_distribution(erdos[0]) dists["Watts-Strogatz"] = degree_distribution(watts[0]) dists["Barabási-Albert"] = degree_distribution(barabasi[0]) # plot print("Plotting...") sns.set() pp.title("Erdös-Rényi - Degree Distribution") pp.hist(list(erdos[0].degree().values()), dists["Erdös-Rényi"], color=colors[0]) pp.ylabel("Frequency") pp.xlabel("Degree (k)") pp.grid(False) pp.savefig('plots/erdos-degree-dist.png') pp.clf() pp.title("Watts-Strogatz - Degree Distribution") pp.hist(list(watts[0].degree().values()), dists["Watts-Strogatz"], color=colors[1]) pp.ylabel("Frequency") pp.xlabel("Degree (k)") pp.grid(False) pp.savefig('plots/watts-degree-dist.png') pp.clf() pp.title("Barabási-Albert - Degree Distribution") pp.hist(list(barabasi[0].degree().values()), dists["Barabási-Albert"], color=colors[2]) pp.ylabel("Frequency") pp.xlabel("Degree (k)") pp.grid(False) pp.savefig('plots/barabasi-degree-dist.png') pp.clf() print("Done plotting.") # table print("Taking measures...") lens = {} lens["erdos"] = [] lens["watts"] = [] lens["barabasi"] = [] degrees = {} degrees["erdos"] = [] degrees["watts"] = [] degrees["barabasi"] = [] clusterings = {} clusterings["erdos"] = [] clusterings["watts"] = [] clusterings["barabasi"] = [] assortativities = {} assortativities["erdos"] = [] assortativities["watts"] = [] assortativities["barabasi"] = [] shortest_paths = {} shortest_paths["erdos"] = [] shortest_paths["watts"] = [] shortest_paths["barabasi"] = [] entropies = {} entropies["erdos"] = [] entropies["watts"] = [] entropies["barabasi"] = [] moments = {} moments["erdos"] = [] moments["watts"] = [] moments["barabasi"] = [] print("Calculating Erdös-Rényi measurements...") for graph in erdos: lens["erdos"].append(len(graph)) degrees["erdos"].append(average_degree(graph)) clusterings["erdos"].append(nx.average_clustering(graph)) assortativities["erdos"].append(nx.degree_assortativity_coefficient(graph)) shortest_paths["erdos"].append(nx.average_shortest_path_length(graph)) entropies["erdos"].append(entropy(graph)) moments["erdos"].append(stat_moment(graph, 2)) print("Calculating Watts-Strogatz measurements...") for graph in watts: lens["watts"].append(len(graph)) degrees["watts"].append(average_degree(graph)) clusterings["watts"].append(nx.average_clustering(graph)) assortativities["watts"].append(nx.degree_assortativity_coefficient(graph)) shortest_paths["watts"].append(nx.average_shortest_path_length(graph)) entropies["watts"].append(entropy(graph)) moments["watts"].append(stat_moment(graph, 2)) print("Calculating Barabási-Albert measurements...") for graph in barabasi: lens["barabasi"].append(len(graph)) degrees["barabasi"].append(average_degree(graph)) clusterings["barabasi"].append(nx.average_clustering(graph)) assortativities["barabasi"].append(nx.degree_assortativity_coefficient(graph)) shortest_paths["barabasi"].append(nx.average_shortest_path_length(graph)) entropies["barabasi"].append(entropy(graph)) moments["barabasi"].append(stat_moment(graph, 2)) print("Measurements for Erdös-Rényi networks") # median print("Median of...") print("Number of nodes = %d" % np.median((lens["erdos"]))) print("Degrees = %.4f" % np.median((degrees["erdos"]))) print("Clustering coefficient = %.4f" % np.median((clusterings["erdos"]))) print("Assortativity = %.4f" % np.median((assortativities["erdos"]))) print("Shortest paths = %.4f" % np.median((shortest_paths["erdos"]))) print("Shannon entropies = %.4f" % np.median((entropies["erdos"]))) print("Second stat moments = %.4f" % np.median((moments["erdos"]))) # deviation print("Standard Deviation of...") print("Number of nodes = %d" % np.std((lens["erdos"]), ddof=1)) print("Degrees = %.4f" % np.std((degrees["erdos"]), ddof=1)) print("Clustering coefficient = %.4f" % np.std((clusterings["erdos"]), ddof=1)) print("Assortativity = %.4f" % np.std((assortativities["erdos"]), ddof=1)) print("Shortest paths = %.4f" % np.std((shortest_paths["erdos"]), ddof=1)) print("Shannon entropies = %.4f" % np.std((entropies["erdos"]), ddof=1)) print("Second stat moments = %.4f" % np.std((moments["erdos"]), ddof=1)) print("Measurements for Watts-Strogatz networks") # median print("Median of...") print("Number of nodes = %d" % np.median((lens["watts"]))) print("Degrees = %.4f" % np.median((degrees["watts"]))) print("Clustering coefficient = %.4f" % np.median((clusterings["watts"]))) print("Assortativity = %.4f" % np.median((assortativities["watts"]))) print("Shortest paths = %.4f" % np.median((shortest_paths["watts"]))) print("Shannon entropies = %.4f" % np.median((entropies["watts"]))) print("Second stat moments = %.4f" % np.median((moments["watts"]))) # deviation print("Standard Deviation of...") print("Number of nodes = %d" % np.std((lens["watts"]), ddof=1)) print("Degrees = %.4f" % np.std((degrees["watts"]), ddof=1)) print("Clustering coefficient = %.4f" % np.std((clusterings["watts"]), ddof=1)) print("Assortativity = %.4f" % np.std((assortativities["watts"]), ddof=1)) print("Shortest paths = %.4f" % np.std((shortest_paths["watts"]), ddof=1)) print("Shannon entropies = %.4f" % np.std((entropies["watts"]), ddof=1)) print("Second stat moments = %.4f" % np.std((moments["watts"]), ddof=1)) print("Measurements for Barabási-Albert networks") # median print("Median of...") print("Number of nodes = %d" % np.median((lens["barabasi"]))) print("Degrees = %.4f" % np.median((degrees["barabasi"]))) print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi"]))) print("Assortativity = %.4f" % np.median((assortativities["barabasi"]))) print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi"]))) print("Shannon entropies = %.4f" % np.median((entropies["barabasi"]))) print("Second stat moments = %.4f" % np.median((moments["barabasi"]))) # deviation print("Standard Deviation of...") print("Number of nodes = %d" % np.std((lens["barabasi"]), ddof=1)) print("Degrees = %.4f" % np.std((degrees["barabasi"]), ddof=1)) print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi"]), ddof=1)) print("Assortativity = %.4f" % np.std((assortativities["barabasi"]), ddof=1)) print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi"]), ddof=1)) print("Shannon entropies = %.4f" % np.std((entropies["barabasi"]), ddof=1)) print("Second stat moments = %.4f" % np.std((moments["barabasi"]), ddof=1))
if not node1 in senses: continue adjacent = [n for n in senses[node1] if n in G and n != node1] for node2 in adjacent: if node2 not in G[node1]: G.add_edge(node1, node2, weight=senses[node1][node2]) logging.info('Graph for %s has %d nodes and %d edges', cid, len(G.nodes()), len(G.edges())) table_rows = ''.join([ '<TR><TD>{:s}</TD><TD>{:f}</TD></TR>'.format(*h) for h in hypernyms[cid].items() ]) table = '<TABLE><TR><TD COLSPAN="2"><B>Hypernyms</B></TD></TR>%s</TABLE>' % table_rows avg_C = nx.average_clustering(G, weight='weight') gv = Graph(comment='Cluster {:s} for {:s}'.format(cid, ', '.join(hypernyms[cid])), encoding='utf-8', engine='sfdp', format='svg') gv.body.append('label="Graph for {:s}, average C={:.4f}"'.format(cid, avg_C)) gv.body.append('size="10,10"') gv.body.append('outputorder=edgesfirst') gv.body.append('overlap=false') gv.body.append('splines=true') gv.node_attr.update(color='#ffffff', margin='0') gv.edge_attr.update(color='#666666') gv.node('Legend', label='<{:s}>'.format(table), shape='none', margin='0')
""" return len([ x for x in network.nodes_iter() if network.nodes[x]['feautures'][feature_index] > 0 ]) // network.order() == 1 if __name__ == '__main__': print("Running tests.") print("Loading network...") load_network() print("done.") failures = 0 def test(actual, expected, test_name): global failures #lol python scope try: print("testing %s..." % (test_name, )) assert actual == expected, "%s failed (%s != %s)!" % ( test_name, actual, expected) print("%s passed (%s == %s)." % (test_name, actual, expected)) except AssertionError as e: print(e) failures += 1 test(network.order(), 4039, "order") test(network.size(), 88234, "size") test(round(nx.average_clustering(network), 4), 0.6055, "clustering") print("%d tests failed." % (failures, ))
fundamental = set(np.where(ib - newib > 0)[0]) # 基础违约银行集合 while delta.sum() > 0.01: ib = newib newib = -maxVector(-IB,-maxVector(np.dot(ib,pi) + E,np.zeros(N))) delta = abs(ib - newib) # sum = sum + 1 # print(sum) contagion = set(np.where(ib - newib > 0)[0]) contagion = contagion.difference(fundamental) # 传染违约银行集合 return newib,fundamental,contagion global N # 银行个数 N = 80 G = nx.barabasi_albert_graph(N,1) # 创建无标度网络 average_clustering = nx.average_clustering(G) # 计算平均聚集度 average_degree_connectivity = nx.average_degree_connectivity(G) # 计算平均连接度 degree = G.degree() # 得到每个节点的度 degree_histogram = nx.degree_histogram(G) # 网络度分布 g = nx.to_numpy_array(G) # 得到信用关系矩阵(不含借贷方向) r,R = relation(g,0,0) # 得到信用关系矩阵(含借贷方向) L = balanceSheet(r,100,2) # 得到借贷规模矩阵 IL = sum(L) # 银行间贷款 L = L.T IB = sum(L) # 银行间借款 p = 0.4 # 杠杆 data = generateData(IL,IB,p) # 得到资产负债表数据 temp = np.array(data < 0) if True in temp: print('error') shock = abs(np.random.normal(0,0.3,(N))) # 给出冲击
plt.title("Degree rank plot of duplication divergence model") plt.ylabel("degree") plt.xlabel("rank") plt.savefig('degree_rank_duplication') degree_sequence = nx.degree_histogram(G_part) plt.figure() plt.loglog(degree_sequence, 'b-', marker='o') plt.title("Degree rank plot of partial model") plt.ylabel("degree") plt.xlabel("rank") plt.savefig('degree_rank_partial_model') # Compute the clustering coefficient for each network. Is the clustering coefficient maintained as the networks become larger? print 'The average clustering coefficient of the model divergence duplication is:', nx.average_clustering( G_dupli) print 'The average clustering coefficient of the model partial duplication is:', nx.average_clustering( G_part) # Is the clustering coefficient maintained as the networks become larger? for i in range(3000, 7000, 1000): G_dupli = partial_dupli(1, 0.2, 0.3, i) print 'The number of nodes of the network is: %d' % (i) print 'The average_clustering coefficient is:', nx.average_clustering( G_dupli) # For the Yeast protein interaction network: yeast = pd.read_csv(argv[1], sep='\s') print ' The head of dataframe before filtering:\n' print yeast.head() print ' The head of dataframe after filtering:\n'
dataset = 'youtube' names = ['feature', 'label', 'graph', 'idx_train', 'idx_eval', 'idx_test'] objects = [] for i in range(len(names)): f = open("./data/{}/{}.bin".format(dataset, names[i]), 'rb') if sys.version_info > (3, 0): # if python==3.x objects.append(pkl.load(f, encoding='latin1')) else: # if python==2.x objects.append(pkl.load(f)) feature, label, graph, idx_train, idx_eval, idx_test = objects print("Below shows the type of the stored objects:") print("-- feature: type={}, shape={}".format(type(feature), feature.shape)) print("-- label: type={}, shape={}, entry_type={}".format( type(label), label.shape, type(label[0][0]))) print("-- graph: type={}, node num={}".format(type(graph), len(graph))) print("-- idx_train: type={}, size={}".format(type(idx_train), len(idx_train))) print("-- idx_eval: type={}, size={}".format(type(idx_eval), len(idx_eval))) print("-- idx_test: type={}, size={}".format(type(idx_test), len(idx_test))) G = nx.from_dict_of_lists(graph) density = nx.density(G) print('density: ', density) coeffiences = nx.average_clustering(G) print('clustering coefficience: ', coeffiences) triadic_closure = nx.transitivity(G) print("Triadic closure:", triadic_closure) betweenness_dict = nx.eigenvector_centrality(G) avg_betweenness = np.mean(np.array([v for k, v in betweenness_dict.items()])) print('avg betweenness: ', avg_betweenness)
def clustering(G): clustering_coeff = nx.average_clustering(G) print "clustering coeff : " + str(clustering_coeff)
with open(comPath + f, 'rb') as fp, open('../usedCsv/replyDict_' + f[12:14] + '.pickle', 'rb') as fReply: comObj = pickle.load(fp) replyDict = pickle.load(fReply) community_size_dict = {} community_link_dict = {} loadCommunity() g = nx.Graph() community_node_size = [] for community in sorted(community_size_dict.keys()): community_node_size.append(len(community_size_dict[community])) g.add_node(community, size=len(community_size_dict[community])) community_node_size_normalise = [ float(i) / sum(community_node_size) for i in community_node_size ] for node in sorted(community_link_dict.keys()): for edge in community_link_dict[node]: g.add_edge(node, edge, weight=community_link_dict[node][edge]) edgewidth = [d['weight'] for (u, v, d) in g.edges(data=True)] nx.write_gexf(g, 'community_graph_' + f[12:14] + '.gexf') print('Nodes:', len(g.nodes()), 'Links:', len(g.edges()), 'Average cluster:', nx.average_clustering(g))
def write_lex_stats(b, num, f, f2, d_pos, lang, Graph=False): """Use Levenshtein package to calcualte lev and count up mps, neighbors, etc""" total = 0. total_diff = nltk.defaultdict(int) total_same = nltk.defaultdict(int) mps = 0 neighbors = 0 homophones = 0 lev_total = 0.0 lev_total_diff = 0.0 lev_total_same = 0.0 diff = 0 init = 0 last = 0 specific_mps = defaultdict(int) specific_mps_init = defaultdict(int) ndict = nltk.defaultdict(int) mdict = nltk.defaultdict(int) hdict = nltk.defaultdict(int) uniq = nltk.defaultdict(int) avg_lev = nltk.defaultdict(list) tot = len(b) * 1.0 g = nx.Graph() g.l = {} lengths_all = nltk.defaultdict(int) for item in b: g.add_node(item) length = len(item) lengths_all[len(item)] += 1 for item in itertools.combinations(b, 2): lev = Levenshtein.distance(item[0], item[1]) if len(item[0]) == len(item[1]): avg_lev[item[0]].append(lev) avg_lev[item[1]].append(lev) if lev == 0: homophones += 1 hdict[item[0]] += 1 elif lev == 1: g.add_edge(item[0], item[1]) neighbors += 1 ndict[item[0]] += 1 ndict[item[1]] += 1 if (d_pos[len(item[0])][item[0]] != d_pos[len(item[1])][item[1]]): diff += 1 if len(item[0]) == len(item[1]): #if it's a minimal pair l = len(item[0]) pair_ph = find_minimal_pair_diff(item[0], item[1]) specific_mps["_".join(sorted(pair_ph))] += 1 pos1 = item[0].index(pair_ph[0]) pos2 = item[1].index(pair_ph[1]) if (d_pos[len(item[0])][item[0]] != d_pos[len( item[1])][item[1]]): lev_total_diff += lev total_diff[len(item[0])] += 1 else: lev_total_same += lev total_same[len(item[0])] += 1 if pos1 == pos2 and pos1 == 0: specific_mps_init["_".join(sorted(pair_ph))] += 1 init += 1 if pos1 == pos2 and pos1 == len(item[0]) - 1: last += 1 mps += 1 mdict[item[0]] += 1 #*log(dict_b[item[1]]) mdict[item[1]] += 1 #*log(dict_b[item[0]]) uniq[item[0]] = 1 total += 1 lev_total += lev poss_same = nltk.defaultdict(int) poss_diff = nltk.defaultdict(int) for l in d_pos.keys(): count = nltk.defaultdict(int) for cat in [ 'A', 'ADV', 'C', 'ART', 'N', 'PRON', 'NUM', 'EXP', 'V', 'PREP', 'NOM', 'VER', 'PRO', 'PRE', 'AUX', 'ADJ', 'CON' ]: count[cat] = len( [i for i in d_pos[l].keys() if d_pos[l][i] == cat]) poss_same[l] += (count[cat] * (count[cat] - 1)) / 2 for p in itertools.combinations(count.keys(), 2): poss_diff[l] += count[p[0]] * count[p[1]] if poss_diff[l] == 0: poss_diff[l] = 1 if poss_same[l] == 0: poss_same[l] = 1 # total_diff[l] = 1.0*total_diff[l]/ poss_diff[l] # total_same[l] = 1.0*total_same[l]/ poss_same[l] # print l, total_diff[l], total_same[l] total_d = 1.0 * sum(total_diff.values()) / sum(poss_diff.values()) total_s = 1.0 * sum(total_same.values()) / sum(poss_same.values()) Gcc = nx.connected_component_subgraphs(g) #print num, len(Gcc[0]), len(Gcc[1]) # print "neighbors", neighbors # print "average clustering", average_clustering(g) if graph == True: plt.figure(figsize=(50, 50)) pos = nx.spring_layout(g) nx.draw_networkx(g, pos, with_labels=False, node_size=40, edge_color='0.8', node_color='k') plt.savefig('graph/' + str(num)) conf = specific_mps["b_p"] + specific_mps["d_t"] + specific_mps[ "g_k"] + specific_mps["f_v"] + specific_mps["s_z"] + specific_mps["S_Z"] dist = specific_mps["t_Z"] + specific_mps["d_S"] + specific_mps[ "g_f"] + specific_mps["p_z"] + specific_mps["k_v"] + specific_mps["b_s"] conf_init = specific_mps["b_p"] + specific_mps["d_t"] + specific_mps[ "g_k"] + specific_mps["f_v"] + specific_mps["s_z"] + specific_mps["S_Z"] dist_init = specific_mps["t_Z"] + specific_mps["d_S"] + specific_mps[ "g_f"] + specific_mps["p_z"] + specific_mps["k_v"] + specific_mps["b_s"] f.write(",".join([ str(x) for x in [ num, len(hdict), len(b) - (len(uniq) - len(hdict)) - 1, mps, neighbors, lev_total / total, len(b), nx.average_clustering(g), nx.transitivity(g), len(nx.connected_component_subgraphs(g)[0]) / tot, specific_mps["b_p"], specific_mps["d_t"], specific_mps["g_k"], total_d, total_s, conf, dist, conf_init, dist_init, diff / neighbors, init, last ] ]) + "\n") for item in b: if len(item) < 15: f2.write(",".join([ str(num), str(item), str(hdict[item]), str(mdict[item] / (hdict[item] + 1.)), str(ndict[item] / (hdict[item] + 1.)), str(1.0 * sum(avg_lev[item]) / len(avg_lev[item])), str(len(item)) ]) + "\n") return
def weakenClu(G0, nswap=1, max_tries=100, connected=1): # 保证度分度特性不变的情况下随机交换连边 if not nx.is_connected(G0): raise nx.NetworkXError("非连通图,必须为连通图") if G0.is_directed(): raise nx.NetworkXError("仅适用于无向图") if nswap > max_tries: raise nx.NetworkXError("交换次数超过允许的最大次数") if len(G0) < 4: raise nx.NetworkXError("节点数太少,至少要含四个节点") tn = 0 #尝试次数 swapcount = 0 #有效交换次数 G = copy.deepcopy(G0) # keys,deges =zip(*G.degree().items()) keys, edges = zip(*dict(G.degree()).items()) cdf = nx.utils.cumulative_distribution(edges) # 计算度的累积分布 path = nx.average_shortest_path_length(G) print(path) while swapcount < nswap: if tn >= max_tries: e = ('尝试次数 (%s) 已超过允许的最大次数' % tn + '有效交换次数(%s)' % swapcount) print(e) break tn += 1 oldG = copy.deepcopy(G) avcOldG = nx.average_clustering(oldG) #保证度分布不变的情况下,随机选取两条边u-v,x-y (ui, xi) = nx.utils.discrete_sequence(2, cdistribution=cdf) #返回长度为2的采样序列 if ui == xi: continue u = keys[ui] x = keys[xi] v = random.choice(list(G[u])) y = random.choice(list(G[x])) if len(set([u, v, x, y])) == 4: if (y not in G[u]) and (v not in G[x]): G.add_edge(u, y) G.add_edge(v, x) G.remove_edge(u, v) G.remove_edge(x, y) avcNewG = nx.average_clustering(G) if avcOldG < avcNewG: G.add_edge(u, v) G.add_edge(x, y) G.remove_edge(u, y) G.remove_edge(x, v) continue if connected == 1: if not nx.is_connected(G): G.add_edge(u, v) G.add_edge(x, y) G.remove_edge(u, y) G.remove_edge(x, v) continue new_path = nx.average_shortest_path_length(G) ret = abs(path - new_path) if ret > 0.1: G.add_edge(u, v) G.add_edge(x, y) G.remove_edge(u, y) G.remove_edge(x, v) continue swapcount = swapcount + 1 return G
verbs = ['бежать_VERB', 'идти_VERB', 'ускоряться_VERB', '_VERB', 'перемещаться_VERB', 'двигаться_VERB', 'шагать_VERB', 'нестись_VERB', 'лететь_VERB', 'скакать_VERB', 'ехать_VERB'] G = nx.Graph() G.add_nodes_from(verbs) for word in verbs: if word in model: Cos = model.similarity(word, word) if Cos < 0.9999 and Cos > 0.5: G.add_edge(word, word) else: print('There is no such word in model') print('узлы', G.nodes()) print('рёбра', G,edges()) pos = nx.spring_layout(G) nx.draw_networkx_nodes(dolphin_G, pos, node_color='black', node_size=25) nx.draw_networkx_edges(dolphin_G, pos, edge_color='red') nx.draw_networkx_labels(dolphin_G, pos, font_size=10, font_family='Arial') plt.axis('off') plt.show() central_words = [] deg = nx.degree_centrality(G) for nodeid in sorted(deg, key=deg.get, reverse=True): central_words.append(nedeid) print('Центральные слова в графе:', ", ".join(central_words[:3])) print('Радиус графа:', nx.redius(G)) print('Коэффициент кластеризации:', nx.average_clustering(G))
NUM_NODES = 4158 if __name__ == "__main__": G = nx.Graph() with open("gr_qc_coauthorships.txt", "r") as f: for line in f: lst = line.strip('\n').split(' ') G.add_node(lst[0]) G.add_node(lst[1]) G.add_edge(lst[0], lst[1]) degrees = [] for node in G.nodes(): degrees.append(G.degree(node)) degrees = sorted(degrees) avg_cluster = nx.average_clustering(G) overall_cluster = nx.transitivity(G) * 3 '''max_diam = nx.diameter(G) avg_diam = nx.average_shortest_path_length(G) print("Average clustering coefficient is: " + str(avg_cluster)) print("Overall clustering coefficient is: " + str(overall_cluster)) print("Maximal diameter is: " + str(max_diam)) print("Average diameter is: " + str(avg_diam)) plt.hist(degrees, bins = 'auto') plt.xlabel("Degree of node") plt.ylabel("Number of nodes")''' plt.figure() values, base = np.histogram(degrees, bins=40) ccdf = np.ones(len(values)) - np.cumsum(values / sum(values)) plt.plot(ccdf) plt.title("CCDF of Degree Nodes")
randomNetwork=nx.to_networkx_graph(randomArray>1-randomProb) nx.draw_networkx(randomNetwork) #Degree distribution degree_sequence = [d for n, d in randomNetwork.degree()] plt.hist(degree_sequence,bins='auto',density=1) #Calculate number of edges (L) for random networks randomNum=1000 Nedges=np.zeros(randomNum) ClusteringCoeff=np.zeros(randomNum) for i in range(randomNum): randomArray=np.tril(np.random.random_sample([nodeSize,nodeSize])) np.fill_diagonal(randomArray,0) randomNetwork=nx.to_networkx_graph(randomArray>1-randomProb) Nedges[i]=randomNetwork.number_of_edges() ClusteringCoeff[i]=nx.average_clustering(randomNetwork) plt.hist(Nedges,bins='auto',density=1) plt.hist(ClusteringCoeff,bins='auto',density=1) #Evolution of random networks nodeSize=100 randomProbRange=np.arange(0.001,0.031,0.001) randomProbLccSize=np.zeros(len(randomProbRange)) for i in range(len(randomProbRange)): randomProb=randomProbRange[i] randomArray=np.tril(np.random.random_sample([nodeSize,nodeSize])) np.fill_diagonal(randomArray,0) randomNetwork=nx.to_networkx_graph(randomArray>1-randomProb) lcc_node = max(nx.connected_components(randomNetwork),key=len) randomNetworkLcc=randomNetwork.subgraph(lcc_node) randomProbLccSize[i]=randomNetworkLcc.number_of_nodes()
def average_clustering_coefficient(G): info = nx.info(G).split() if info[2] == 'MultiGraph': return nx.average_clustering(nx.Graph(G)) else: return nx.average_clustering(G)
def task_2_6(file, density, freq, f) : # creating labels list table = pd.read_csv("data/channel_locations.txt", delimiter='\s+', ) labels = list(table.label) for i in range(len(labels)) : # clean labels name labels[i] = labels[i].replace('..', '') labels[i] = labels[i].replace('.', '') labels_dic = dict(zip([x for x in range(len(labels))], labels)) pdc = PDC(file, freq) # adjacency matrix a_matrix = pdc.adj_matrix(density) # direct4ed graph G = nx.from_numpy_matrix(a_matrix, create_using=nx.DiGraph) ### GLOBAL INDECES # avarage clustering coefficient avg_clustering_coeff = nx.average_clustering(G) # avarage shortest path length avg_shortest_path_length = nx.average_shortest_path_length(G) ### LOCAL INDECES # degree dictionaries # for each node (0-63) return: # the number of edges adjacent to the node degree = dict(nx.degree(G)) nodes_list = list(degree.keys()) degree_df = pd.DataFrame.from_dict(degree, orient='index', columns=["Degree"]) # the number of edges pointing to the node in_degree = dict(G.in_degree()) in_degree_df = pd.DataFrame.from_dict(in_degree, orient='index', columns=["In-Degree"]) # the number of edges pointing out of the node out_degree = dict(G.out_degree()) out_degree_df = pd.DataFrame.from_dict(out_degree, orient='index', columns=["Out-Degree"]) ### LIST THE FIRST 10 CHANNELS for local indeces top_10_degree = [] top_10_in = [] top_10_out = [] for i in range(10) : # add first i channel to list top_10_degree.append(labels[max(degree, key=degree.get)]) # put to zero the value degree[max(degree, key=degree.get)] = 0 top_10_in.append(labels[max(in_degree, key=in_degree.get)]) in_degree[max(in_degree, key=in_degree.get)] = 0 top_10_out.append(labels[max(out_degree, key=out_degree.get)]) out_degree[max(out_degree, key=out_degree.get)] = 0 indeces = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] top_10_degree_df = pd.DataFrame(top_10_degree, index=indeces, columns=['Degree']) top_10_in_df = pd.DataFrame(top_10_in, index=indeces, columns=['In-Degree']) top_10_out_df = pd.DataFrame(top_10_out, index=indeces, columns=['Out-Degree']) # save top 10 to file csv top_10_df = pd.concat([top_10_degree_df, top_10_in_df, top_10_out_df], axis=1) try : os.remove('results/task_2_6_ %s' % f + '_top_10.csv') top_10_df.to_csv('results/task_2_6_ %s' % f + '_top_10.csv') except : top_10_df.to_csv('results/task_2_6_ %s' % f + '_top_10.csv') # save all degree on csv all_degree_df = pd.concat([degree_df, in_degree_df, out_degree_df], axis=1) all_degree_df = all_degree_df.rename(index=labels_dic) try : os.remove('results/task_2_6_%s' % f + '_all_degree.csv') all_degree_df.to_csv('results/task_2_6_%s' % f + '_all_degree.csv') except : all_degree_df.to_csv('results/task_2_6_%s' % f + '_all_degree.csv') # save global indeces to file txt try : os.remove('results/task_2_6_%s' % f + '_global.txt') text_file = open('results/task_2_6_%s' % f + '_global.txt', "w") text_file.write("Avarage Clustering Coefficient: %f \n \n" % avg_clustering_coeff) text_file.write("Avarage Shortest Path: %f" % avg_shortest_path_length) text_file.close() except : text_file = open('results/task_2_6_%s' % f + '_global.txt', "w") text_file.write("Avarage Clustering Coefficient: %f \n \n" % avg_clustering_coeff) text_file.write("Avarage Shortest Path: %f" % avg_shortest_path_length) text_file.close() print("Done!")
def inter_random_25k(G0, node_community, n_swap=1, max_tries=100, connected=1): """Returns a 2.5K null model beased on random reconnection algorithm inter communities Parameters ---------- G0 : undirected and unweighted graph node_community : list nodes and the communities they belong to n_swap : int (default = 1) Number of double-edge swaps to perform max_tries : int (default = 100) Maximum number of attempts to swap edges connected : int keep the connectivity of the graph or not. 1 : keep, 0 : not keep Notes ----- Keep the 2.5k-characteristic unchanged and the graph connected. Swap edges inter communities. """ judge_error(G0, n_swap, max_tries, connected) # Number of attempts to swap n_try = 0 # Number of effective swaps swapcount = 0 G = copy.deepcopy(G0) keys, degrees = zip(*G.degree().items()) cdf = nx.utils.cumulative_distribution(degrees) while swapcount < n_swap: if n_try >= max_tries: print('Maximum number of swap attempts (%s) exceeded ' % n_try + 'before desired swaps achieved (%s)' % swapcount) break n_try += 1 # Keep the degree distribution unchanged,choose two edges (u-v,x-y) # randomly (ui, xi) = nx.utils.discrete_sequence(2, cdistribution=cdf) if ui == xi: continue u = keys[ui] x = keys[xi] v = random.choice(list(G[u])) y = random.choice(list(G[x])) # Make sure the four nodes are not repeated. if len(set([u, v, x, y])) == 4: # Make sure the chosen edges are inner community. if edge_in_community(node_community, (u, v)) == 0 and edge_in_community( node_community, (x, y)) == 0: # Make sure the edges created are inner community. if edge_in_community(node_community, (u, y)) == 0 and edge_in_community( node_community, (v, x)) == 0: # Keep the degree matching characteristic of nodes # unchanged. if G.degree(v) == G.degree(y): # Make sure the new edges are not exist in the original # graph. if (y not in G[u]) and (v not in G[x]): G.add_edge(u, y) G.add_edge(v, x) G.remove_edge(u, v) G.remove_edge(x, y) degree_node_list = map( lambda t: (t[1], t[0]), G0.degree([u, v, x, y] + list(G[u]) + list(G[v]) + list(G[x]) + list(G[y])).items()) dict_degree = count_degree_nodes(degree_node_list) for i in range(len(dict_degree)): avcG0 = nx.average_clustering( G0, nodes=list(dict_degree.values())[i], weight=None, count_zeros=True) avcG = nx.average_clustering( G, nodes=list(dict_degree.values())[i], weight=None, count_zeros=True) i += 1 # If the degree-related clustering coefficient changed after scrambling # withdraw this operation about scrambling. if avcG0 != avcG: G.add_edge(u, v) G.add_edge(x, y) G.remove_edge(u, y) G.remove_edge(x, v) break # if connected = 1 but the original graph is not connected fully, # withdraw the operation about the swap of # edges. if connected == 1: if not nx.is_connected(G): G.add_edge(u, v) G.add_edge(x, y) G.remove_edge(u, y) G.remove_edge(x, v) continue swapcount += 1 return G
import networkx as nx import matplotlib.pyplot as plt NUM = 282 p = 0.055 c = 14 G = nx.random_graphs.watts_strogatz_graph(NUM, c, p) nx.draw(G, pos=nx.circular_layout(G)) plt.show() degree = nx.degree_histogram(G) x = range(len(degree)) y = [z / float(sum(degree)) for z in degree] s = 0 for i in range(len(y)): s += (y[i] * (i + 1)) print("Average Degree:", s) print("Average Clustering:", nx.average_clustering(G)) try: print("Average Path Length:", nx.average_shortest_path_length(G)) except nx.exception.NetworkXError: print("Graph is not connected.") plt.figure() plt.scatter(x, y, marker='.') # log plt.figure() plt.loglog(x, y, linewidth=0, marker='.') plt.show()
print(maximum_clique_size) maximu_cliques = [x for x in cliques if len(x) == maximum_clique_size] print(maximu_cliques) n_maximum_cliques = len(maximu_cliques) print(n_maximum_cliques) average_clique_size = sum(sizes_of_cliques) / n_cliques print(average_clique_size) maximum_clique_sets = [set(x) for x in maximu_cliques] print(maximum_clique_sets) # Clustering Coefficient nx.clustering(g) nx.average_clustering(g) ego_net = nx.ego_graph(g, "53") len(ego_net) nx.average_clustering(ego_net) plt.figure(figsize=(8, 8)) pos = nx.random_layout(ego_net) nx.draw_networkx(ego_net, pos, node_size=300, with_labels=True) nx.clustering(ego_net)
def fc_metrics_subareas(fcfile_pickle, subareas=['M1', 'STN', 'GP'], subtitle='M1DBS'): """ cc: average Clustering Coefficient nbc: Node Betweenness centrality () """ with open(fcfile_pickle, 'rb') as handle: fc = pickle.load(handle) imcohs = fc['imcohs'] pvals = fc['pvals'] chnAreas = fc['chnAreas'] idxs_remain = [] chnAreas_new = [] for ci, carea in enumerate(chnAreas): for sarea in subareas: if sarea.lower() in carea.lower(): idxs_remain.append(ci) chnAreas_new.append(carea) idxs_remain = np.array(idxs_remain) tmp = imcohs[idxs_remain, :] tmp = tmp[:, idxs_remain] imcohs = tmp tmp = pvals[idxs_remain, :] tmp = tmp[:, idxs_remain] pvals = tmp chnAreas = chnAreas_new # multiple comparison correction, get weights reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols] = np.where(reject == True) weight = np.zeros(imcohs.shape) if len(rows) > 0: weight[rows, cols] = imcohs[rows, cols] weight = abs(weight) G = nx.Graph() G.add_nodes_from(np.arange(0, weight.shape[0])) for i in range(0, weight.shape[0] - 1): for j in range(i + 1, weight.shape[0]): if weight[i, j] > 0: G.add_edge(i, j, weight=weight[i, j]) cc = nx.average_clustering(G) nbcs = nx.degree_centrality(G) folder, filename = os.path.split(fcfile_pickle)[0], os.path.split( fcfile_pickle)[1] metricfile = os.path.join(folder, 'metric_' + subtitle + '_' + filename) metrics = dict() metrics['cc'] = cc metrics['nbcs'] = nbcs metrics['chnAreas'] = fc['chnAreas'] with open(metricfile, 'wb') as f: pickle.dump(metrics, f)
def BA_model(): barabasi05 = [] barabasi10 = [] barabasi15 = [] barabasi20 = [] # generate 30 networks, do the same as one for i in range(10): #generate Barabasi network with p = power barabasi05.append(ig_to_nx(ig.Graph.Barabasi(500, 10, power=0.5))) barabasi10.append(ig_to_nx(ig.Graph.Barabasi(500, 10, power=1))) barabasi15.append(ig_to_nx(ig.Graph.Barabasi(500, 10, power=1.5))) barabasi20.append(ig_to_nx(ig.Graph.Barabasi(500, 10, power=2))) print("Finding degree distributions...") dists = {} dists["barabasi05"] = degree_distribution(barabasi05[0]) dists["barabasi10"] = degree_distribution(barabasi10[0]) dists["barabasi15"] = degree_distribution(barabasi15[0]) dists["barabasi20"] = degree_distribution(barabasi20[0]) # table print("Taking measures...") lens = {} lens["barabasi05"] = [] lens["barabasi10"] = [] lens["barabasi15"] = [] lens["barabasi20"] = [] degrees = {} degrees["barabasi05"] = [] degrees["barabasi10"] = [] degrees["barabasi15"] = [] degrees["barabasi20"] = [] clusterings = {} clusterings["barabasi05"] = [] clusterings["barabasi10"] = [] clusterings["barabasi15"] = [] clusterings["barabasi20"] = [] assortativities = {} assortativities["barabasi05"] = [] assortativities["barabasi10"] = [] assortativities["barabasi15"] = [] assortativities["barabasi20"] = [] shortest_paths = {} shortest_paths["barabasi05"] = [] shortest_paths["barabasi10"] = [] shortest_paths["barabasi15"] = [] shortest_paths["barabasi20"] = [] entropies = {} entropies["barabasi05"] = [] entropies["barabasi10"] = [] entropies["barabasi15"] = [] entropies["barabasi20"] = [] moments = {} moments["barabasi05"] = [] moments["barabasi10"] = [] moments["barabasi15"] = [] moments["barabasi20"] = [] for graph in barabasi05: lens["barabasi05"].append(len(graph)) degrees["barabasi05"].append(average_degree(graph)) clusterings["barabasi05"].append(nx.average_clustering(graph)) assortativities["barabasi05"].append(nx.degree_assortativity_coefficient(graph)) shortest_paths["barabasi05"].append(nx.average_shortest_path_length(graph)) entropies["barabasi05"].append(entropy(graph)) moments["barabasi05"].append(stat_moment(graph, 2)) for graph in barabasi10: lens["barabasi10"].append(len(graph)) degrees["barabasi10"].append(average_degree(graph)) clusterings["barabasi10"].append(nx.average_clustering(graph)) assortativities["barabasi10"].append(nx.degree_assortativity_coefficient(graph)) shortest_paths["barabasi10"].append(nx.average_shortest_path_length(graph)) entropies["barabasi10"].append(entropy(graph)) moments["barabasi10"].append(stat_moment(graph, 2)) for graph in barabasi15: lens["barabasi15"].append(len(graph)) degrees["barabasi15"].append(average_degree(graph)) clusterings["barabasi15"].append(nx.average_clustering(graph)) assortativities["barabasi15"].append(nx.degree_assortativity_coefficient(graph)) shortest_paths["barabasi15"].append(nx.average_shortest_path_length(graph)) entropies["barabasi15"].append(entropy(graph)) moments["barabasi15"].append(stat_moment(graph, 2)) for graph in barabasi20: lens["barabasi20"].append(len(graph)) degrees["barabasi20"].append(average_degree(graph)) clusterings["barabasi20"].append(nx.average_clustering(graph)) assortativities["barabasi20"].append(nx.degree_assortativity_coefficient(graph)) shortest_paths["barabasi20"].append(nx.average_shortest_path_length(graph)) entropies["barabasi20"].append(entropy(graph)) moments["barabasi20"].append(stat_moment(graph, 2)) print("Calculating Barabási-Albert measurements for alfa = 0.5...") # median print("Median of Barabasi alfa = 0.5") print("Number of nodes = %d" % np.median((lens["barabasi05"]))) print("Degrees = %.4f" % np.median((degrees["barabasi05"]))) print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi05"]))) print("Assortativity = %.4f" % np.median((assortativities["barabasi05"]))) print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi05"]))) print("Shannon entropies = %.4f" % np.median((entropies["barabasi05"]))) print("Second stat moments = %.4f" % np.median((moments["barabasi05"]))) # deviation print("Standard Deviation of Barabasi alfa = 0.5") print("Number of nodes = %d" % np.std((lens["barabasi05"]), ddof=1)) print("Degrees = %.4f" % np.std((degrees["barabasi05"]), ddof=1)) print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi05"]), ddof=1)) print("Assortativity = %.4f" % np.std((assortativities["barabasi05"]), ddof=1)) print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi05"]), ddof=1)) print("Shannon entropies = %.4f" % np.std((entropies["barabasi05"]), ddof=1)) print("Second stat moments = %.4f" % np.std((moments["barabasi05"]), ddof=1)) print("Calculating Barabási-Albert measurements for alfa = 1.0...") # median print("Median of Barabasi alfa = 1.0") print("Number of nodes = %d" % np.median((lens["barabasi10"]))) print("Degrees = %.4f" % np.median((degrees["barabasi10"]))) print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi10"]))) print("Assortativity = %.4f" % np.median((assortativities["barabasi10"]))) print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi10"]))) print("Shannon entropies = %.4f" % np.median((entropies["barabasi10"]))) print("Second stat moments = %.4f" % np.median((moments["barabasi10"]))) # deviation print("Standard Deviation of Barabasi alfa = 1.0") print("Number of nodes = %d" % np.std((lens["barabasi10"]), ddof=1)) print("Degrees = %.4f" % np.std((degrees["barabasi10"]), ddof=1)) print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi10"]), ddof=1)) print("Assortativity = %.4f" % np.std((assortativities["barabasi10"]), ddof=1)) print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi10"]), ddof=1)) print("Shannon entropies = %.4f" % np.std((entropies["barabasi10"]), ddof=1)) print("Second stat moments = %.4f" % np.std((moments["barabasi10"]), ddof=1)) print("Calculating Barabási-Albert measurements for alfa = 1.5...") # median print("Median of Barabasi alfa = 1.5") print("Number of nodes = %d" % np.median((lens["barabasi15"]))) print("Degrees = %.4f" % np.median((degrees["barabasi15"]))) print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi15"]))) print("Assortativity = %.4f" % np.median((assortativities["barabasi15"]))) print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi15"]))) print("Shannon entropies = %.4f" % np.median((entropies["barabasi15"]))) print("Second stat moments = %.4f" % np.median((moments["barabasi15"]))) # deviation print("Standard Deviation of Barabasi alfa = 1.5") print("Number of nodes = %d" % np.std((lens["barabasi15"]), ddof=1)) print("Degrees = %.4f" % np.std((degrees["barabasi15"]), ddof=1)) print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi15"]), ddof=1)) print("Assortativity = %.4f" % np.std((assortativities["barabasi15"]), ddof=1)) print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi15"]), ddof=1)) print("Shannon entropies = %.4f" % np.std((entropies["barabasi15"]), ddof=1)) print("Second stat moments = %.4f" % np.std((moments["barabasi15"]), ddof=1)) print("Calculating Barabási-Albert measurements for alfa = 2.0...") # median print("Median of Barabasi alfa = 2.0") print("Number of nodes = %d" % np.median((lens["barabasi20"]))) print("Degrees = %.4f" % np.median((degrees["barabasi20"]))) print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi20"]))) print("Assortativity = %.4f" % np.median((assortativities["barabasi20"]))) print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi20"]))) print("Shannon entropies = %.4f" % np.median((entropies["barabasi20"]))) print("Second stat moments = %.4f" % np.median((moments["barabasi20"]))) # deviation print("Standard Deviation of Barabasi alfa = 2.0") print("Number of nodes = %d" % np.std((lens["barabasi20"]), ddof=1)) print("Degrees = %.4f" % np.std((degrees["barabasi20"]), ddof=1)) print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi20"]), ddof=1)) print("Assortativity = %.4f" % np.std((assortativities["barabasi20"]), ddof=1)) print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi20"]), ddof=1)) print("Shannon entropies = %.4f" % np.std((entropies["barabasi20"]), ddof=1)) print("Second stat moments = %.4f" % np.std((moments["barabasi20"]), ddof=1))
def smallworldstats(graph): avgclustering = nx.average_clustering(graph) avgpathlength = nx.average_shortest_path_length(graph) return avgclustering, avgpathlength
def boxStats(boxNet): #fordavid other three calculated here? ## matrices boxNodes = len(boxNet) boxMat = nx.to_numpy_matrix(boxNet) boxSparse = csgraph_from_dense(boxMat) boxMatPath = shortest_path(boxSparse, method='auto', directed=False, return_predecessors=False, unweighted=True, overwrite=False) boxPathList = [] pairsNumBox = len(list(combinations(range(boxNodes), 2))) for i in range(boxNodes-1): for j in range(i+1, boxNodes): tempDist = boxMatPath[i][j] if tempDist > 0 and np.isfinite(tempDist): boxPathList.append(tempDist) ##boxNet characteristics degreeRaw = list(boxNet.degree()) degreeBox = [] for i in degreeRaw: degreeBox.append(i) degreeNormBox = np.divide(degreeBox, np.sum(degreeBox), dtype = float) diameterPathBox = np.max(boxPathList) avgPathDistBox = np.mean(boxPathList) nEdgesBox = np.divide(np.sum(degreeBox), 2, dtype = float) edgePBox = nx.density(boxNet) globalEfficiencyBox = np.divide(sum(np.divide(1, boxPathList, dtype = float)),pairsNumBox , dtype = float) radiusBox = nx.radius(boxNet) kCoreBox = max(list(nx.core_number(boxNet).values())) degreeAssortBox = nx.degree_assortativity_coefficient(boxNet) avgDegreeBox = np.mean(degreeBox) maxDegreeBox = max(degreeBox) eValsBox = np.linalg.eigvals(boxMat) spectralRadiusAdjBox = max(abs(eValsBox)) eigenCentDictBox = nx.eigenvector_centrality_numpy(boxNet, weight=None) eigenCentRawBox = list(eigenCentDictBox.values()) eigenCentBox = np.divide(eigenCentRawBox, sum(eigenCentRawBox), dtype = float) colorsBox = nx.coloring.greedy_color(boxNet, strategy=nx.coloring.strategy_connected_sequential_bfs) colorNumBox = len(list(set(list(colorsBox.values())))) avgClustCoeffBox = nx.average_clustering(boxNet) scaledSpectralRadiusBox = np.divide(spectralRadiusAdjBox, avgDegreeBox, dtype = float) if motifChoice == 1: freqMBox = motifCalc4(boxNet) else: freqMBox = [0.166666667, 0.166666667, 0.166666667, 0.166666667, 0.166666667, 0.166666667] # network entropy lapMatBox= np.asarray(nx.to_numpy_matrix(nx.from_scipy_sparse_matrix(nx.laplacian_matrix(boxNet)))) eValsLapBox = np.linalg.eigvals(lapMatBox) eValsLapBoxSorted = sorted(np.real(eValsLapBox)) spectralGapBox = eValsLapBoxSorted[1] degreeSumBox = np.sum(degreeBox) lapMatBoxNorm = np.divide(lapMatBox, degreeSumBox, dtype = float) eValsLapBoxNorm = np.linalg.eigvals(lapMatBoxNorm) eValsLapNonZeroBoxNorm = [] for i in eValsLapBoxNorm: j = abs(i) if j > 0: eValsLapNonZeroBoxNorm.append(j) vonEntropyBox = np.divide(entropyCalc(eValsLapNonZeroBoxNorm), math.log(boxNodes,2), dtype = float) degreeEntropyBox = np.divide(entropyCalc(degreeNormBox), math.log(boxNodes,2), dtype = float) KSEntropyBox = np.divide(math.log(spectralRadiusAdjBox, 2), math.log(boxNodes-1,2), dtype = float) motifEntropyBox = np.divide(entropyCalc(freqMBox), math.log(len(freqMBox),2), dtype = float) popEntropyBox = np.divide(entropyCalc(eigenCentBox), math.log(boxNodes,2), dtype = float) graphEntropyBox = np.divide(graphEntropyCalc(colorsBox), math.log(boxNodes,2), dtype = float) return edgePBox, radiusBox, kCoreBox, degreeAssortBox, diameterPathBox, avgPathDistBox, nEdgesBox, globalEfficiencyBox, avgDegreeBox, maxDegreeBox, spectralRadiusAdjBox, spectralGapBox, scaledSpectralRadiusBox, colorNumBox, avgClustCoeffBox, freqMBox, motifEntropyBox, vonEntropyBox, graphEntropyBox, popEntropyBox, KSEntropyBox, degreeEntropyBox
#BG 1359 ZYL 3074 DAC 3076 0 313 772 1 #4H 8463 ZYL 3074 DAC 3076 0 313 1 # ZYL all destinations were DAC airport .ie its outdegree conn1[conn1['main Airport'] == 'ZYL'].count() # 4 ## Betweeness Centrality b = nx.betweenness_centrality(g) # Betweeness_Centrality print(max(b)) #ZYL max betweeness centrality ## Eigen-Vector Centrality evg = nx.eigenvector_centrality(g) # Eigen vector centrality print(max(evg)) #ZYL has maximum influence. # cluster coefficient cluster_coeff = nx.clustering(g) cluster_zyl = nx.clustering(g, 'ZYL') #0.8333333333333334 # If cluster coefficient is closer to 1 it is clique. print(cluster_coeff) #ZYL #clustering coefficient is a measure of the degree to which nodes in a graph tend to cluster together # 2 nodes that are connected are likely to be apart of major network # Average clustering cc = nx.average_clustering(g) print(cc) #0.4870933566129556
df = pd.concat(li, axis=0, ignore_index=True) df = df[['Source', 'Target']] df.drop_duplicates(subset=['Source', 'Target'], inplace=True) #print(df.head(20)) # create the ASOIAF networkx object G = nx.from_pandas_edgelist(df, source='Source', target='Target') # create a random graph networkx object #G = nx.erdos_renyi_graph(len(G.nodes()), 2*len(G.edges())/(len(G.nodes())*(len(G.nodes())-1))) #G = nx.watts_strogatz_graph(len(G.nodes()), 7, 0.3) print("\n\nNumber of nodes: %d" % len(G.nodes())) print("\n\nNumber of edges: %d" % len(G.edges())) graphs = list(nx.connected_component_subgraphs(G)) print("\n\nConnected components: %d (size: %d)" % (len(graphs), len(graphs[0].nodes()))) print("\n\nAverage shortest path: %f" % nx.average_shortest_path_length(graphs[0])) print("\n\nDiameter: %d" % nx.diameter(graphs[0])) print("\n\nAverage clustering coefficient: %f" % nx.average_clustering(G)) degree_distribution.plot_degree_distribution(graphs[0])
def group_evaluate_trace(xnr_user_no, nodes, all_influence, all_sensitive, date_time, G=None): result = {} result['xnr_user_no'] = xnr_user_no result['nodes'] = nodes result['num'] = len(nodes) #从redis中获取社区转发网络 count = 0 scan_cursor = 1 now_ts = time.time() now_date_ts = datetime2ts(ts2datetime(now_ts)) #get redis db number db_number = get_db_num(now_date_ts) print 'db_number:', str(db_number) #get redis db print 'retweet_dict::', retweet_redis_dict retweet_redis = retweet_redis_dict[str(db_number)] comment_redis = comment_redis_dict[str(db_number)] retweet_result = [] for uid in nodes: item_1 = str('retweet_' + uid) # print 'item_lookup::',item_1,type(item_1) re_result = retweet_redis.hgetall(item_1) if re_result: save_dict = dict() save_dict['uid'] = uid save_dict['uid_retweet'] = re_result retweet_result.append(save_dict) # print 'test_result::',retweet_result # print 'aaa:::', retweet_redis.hgetall('retweet_'+str(nodes[-1])) #print 'retweet_redis::',retweet_redis #print 'comment_redis::',comment_redis ''' re_scan = retweet_redis.scan(scan_cursor,count=10) for item in re_scan[1]: # item_list = item.split('_') print 'item::',item,type(item) item_result = retweet_redis.hgetall(item) print 'item_result::',item_result # print 'hlen::',retweet_redis.hlen() # print 'hgetall::',retweet_redis.hgetall() retweet_result = retweet_redis.hgetall(nodes) comment_result = comment_redis.hgetall(nodes) ''' # print 'retweet_result:::',retweet_result #print 'comment_result:::',comment_result G_i = nx.Graph() for i in retweet_result: # print 'i:',i # if not i['found']: # continue uid_retweet = i['uid_retweet'] max_count = max([int(n) for n in uid_retweet.values()]) G_i.add_weighted_edges_from([ (i['uid'], j, float(uid_retweet[j]) / max_count) for j in uid_retweet.keys() if j != i['uid'] and j and i['uid'] ]) ''' for i in comment_result: # print 'comment_i:',i if not i['found']: continue uid_comment = json.loads(i['_source']['uid_comment']) max_count = max([int(n) for n in uid_comment.values()]) G_i.add_weighted_edges_from([(i['_source']['uid'],j,float(uid_comment[j])/max_count) for j in uid_comment.keys() if j != i['_source']['uid'] and j and i['_source']['uid']]) ''' sub_g = G_i.subgraph(nodes) result['density'] = round(nx.density(sub_g), 4) #print 'ave_cluster::',nx.average_clustering(sub_g) try: result['cluster'] = round(nx.average_clustering(sub_g), 4) except: result['cluster'] = 0 result['transitivity'] = round(nx.transitivity(sub_g), 4) ##将结果换成当天的计算结果 influence_field = 'user_index' sensitive_field = 'sensitive' influence_result = get_influence_value(date_time, influence_field, nodes) sensitive_result = get_sensitive_value(date_time, sensitive_field, nodes) result['max_influence'] = round( (max(influence_result) / float(all_influence)) * 100, 4) result['mean_influence'] = round( ((sum(influence_result) / len(influence_result)) / float(all_influence)) * 100, 4) max_sensitive = round((max(sensitive_result) / float(all_sensitive)) * 1, 4) if max_sensitive > 100: result['max_sensitive'] = 100.0000 else: result['max_sensitive'] = max_sensitive result['mean_sensitive'] = round( ((sum(sensitive_result) / len(sensitive_result)) / float(all_sensitive)) * 1, 4) return result
"Branch Avenue", "Suitland", "Naylor Road", "Southern Avenue", "Congress Heights", "Anacostia", "Navy Yard–Ballpark", "Waterfront", "L'Enfant Plaza", "Archives", "Gallery Place", "Mount Vernon Square", "Shaw – Howard University", "U Street", "Columbia Heights", "Georgia Avenue–Petworth", "Fort Totten", "West Hyattsville", "Prince George's Plaza", "College Park–University of Maryland", "Greenbelt" ] add_edges(metro, green_line) silver_line = [ "Wiehle–Reston East", "Spring Hill", "Greensboro", "Tysons Corner", "McLean", "East Falls Church", "Ballston–MU", "Virginia Square–GMU", "Clarendon", "Court House", "Rosslyn", "Foggy Bottom – GWU", "Farragut West", "McPherson Square", "Metro Center", "Federal Triangle", "Smithsonian", "L'Enfant Plaza", "Federal Center SW", "Capitol South", "Eastern Market", "Potomac Avenue", "Stadium–Armory", "Benning Road", "Capitol Heights", "Addison Road", "Morgan Boulevard", "Largo Town Center" ] add_edges(metro, silver_line) #nx.draw(metro_distance, with_labels = True, node_size = 100) nx.write_graphml(metro, os.getcwd() + "\\test.graphml") print("clossness centality") print(nx.closeness_centrality(metro)) print("degree centality") print(nx.degree_centrality(metro)) print("betweenness centrality") print(nx.betweenness_centrality(metro)) print("circuit rank") print(nx.number_of_edges(metro) - nx.number_of_nodes(metro) + 1) print("Average clustering") print(nx.average_clustering(metro))
n1 = set((amazonBooks[asin]['Categories']).split()) n2 = set((amazonBooks[a]['Categories']).split()) n1In2 = n1 & n2 n1Un2 = n1 | n2 if (len(n1Un2)) > 0: similarity = round(len(n1In2)/len(n1Un2),2) copurchaseGraph.add_edge(asin, a.strip(), weight=similarity) # get degree centrality and clustering coefficients # of each ASIN and add it to amazonBooks metadata dc = networkx.degree(copurchaseGraph) for asin in networkx.nodes(copurchaseGraph): metadata = amazonBooks[asin] metadata['DegreeCentrality'] = int(dc[asin]) ego = networkx.ego_graph(copurchaseGraph, asin, radius=1) metadata['ClusteringCoeff'] = round(networkx.average_clustering(ego),2) amazonBooks[asin] = metadata # write amazonBooks data to file # (all except copurchase data - becuase that data is now in the graph) fhw = open('./amazon-books.txt', 'w', encoding='utf-8', errors='ignore') fhw.write("Id\t" + "ASIN\t" + "Title\t" + "Categories\t" + "Group\t" #+ "Copurchased\t" + "SalesRank\t" + "TotalReviews\t" + "AvgRating\t" "DegreeCentrality\t" + "ClusteringCoeff\n") for asin,metadata in amazonBooks.items(): fhw.write(metadata['Id'] + "\t" + \ asin + "\t" + \ metadata['Title'] + "\t" + \ metadata['Categories'] + "\t" + \ metadata['Group'] + "\t" + \
karateclub = nx.read_gml("karate.gml") #analyzing dataset # calculating average degree N, K = karateclub.order(), karateclub.size() print("degree of node 1: ", karateclub.degree(1)) avg_deg = float(K) / N print("Nodes: ", N) print("Edges: ", K) print("Average degree: ", avg_deg) #clustering coefficients print("clustering coefficient node 1: ", nx.clustering(karateclub, 1)) print("average clustering coefficient", nx.average_clustering(karateclub)) #Betweenness centrality print("betweenness centrality of node 1: ", nx.betweenness_centrality(karateclub)[1]) def avg_betw_centr(graph): i = 1 a = 0 N = int(graph.order()) while i < N + 1: a = a + float(nx.betweenness_centrality(graph)[i]) i = i + 1 avg = float(a) / float(graph.order()) print("average betweenness centrality: ", avg)