def main(): tempo_dir = "../corpus-local/tempo-txt" file_regex = ".*\.txt" G = build_graph(tempo_dir, file_regex) """ ccs = nx.clustering(G) avg_clust = sum(ccs.values()) / len(ccs) """ print tempo_dir print "\tAda " + str(len(G.nodes())) + " node." print "\tAda " + str(len(G.edges())) + " edge." print "\tClustering coefficient : " + str(nx.average_clustering(G)) print "\tAverage shortest path length" for g in nx.connected_component_subgraphs(G): print "\t\t" + str(nx.average_shortest_path_length(g)) kompas_dir = "../corpus-local/kompas-txt" G = build_graph(kompas_dir, file_regex) print kompas_dir print "\tAda " + str(len(G.nodes())) + " node." print "\tAda " + str(len(G.edges())) + " edge." print "\tClustering coefficient : " + str(nx.average_clustering(G)) print "\tAverage shortest path length" for g in nx.connected_component_subgraphs(G): print "\t\t" + str(nx.average_shortest_path_length(g))
def testRun(self): sim = watts_strogatz.WS() sim.run( steps=self.starting_network_size, rewiring_probability=self.rewiring_probability, lattice_connections=self.lattice_connections, starting_network_size=self.starting_network_size) with sim.graph.handle as graph: self.assertEqual( self.comparison_graph.number_of_nodes(), graph.number_of_nodes()) self.assertEqual( self.comparison_graph.number_of_edges(), graph.number_of_edges()) if False: self.assertAlmostEqual( nx.diameter(self.comparison_graph), nx.diameter(graph), delta=1. ) self.assertAlmostEqual( nx.average_shortest_path_length(self.comparison_graph), nx.average_shortest_path_length(graph), delta=1. )
def strongly_connected_components(): conn = sqlite3.connect("zhihu.db") #following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 50000) and user_url in (select user_url from User where agree_num > 50000)', conn) following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 10000) and user_url in (select user_url from User where agree_num > 10000)', conn) conn.close() G = nx.DiGraph() cnt = 0 for d in following_data.iterrows(): G.add_edge(d[1][0],d[1][1]) cnt += 1 print 'links number:', cnt scompgraphs = nx.strongly_connected_component_subgraphs(G) scomponents = sorted(nx.strongly_connected_components(G), key=len, reverse=True) print 'components nodes distribution:', [len(c) for c in scomponents] #plot graph of component, calculate saverage_shortest_path_length of components who has over 1 nodes index = 0 print 'average_shortest_path_length of components who has over 1 nodes:' for tempg in scompgraphs: index += 1 if len(tempg.nodes()) != 1: print nx.average_shortest_path_length(tempg) print 'diameter', nx.diameter(tempg) print 'radius', nx.radius(tempg) pylab.figure(index) nx.draw_networkx(tempg) pylab.show() # Components-as-nodes Graph cG = nx.condensation(G) pylab.figure('Components-as-nodes Graph') nx.draw_networkx(cG) pylab.show()
def algorithm(w1,w2,w3,w4,G1,G2,G3,G4): try: cc=np.array([nx.average_clustering(G1,weight='weight'),nx.average_clustering(G2,weight='weight'),nx.average_clustering(G3,weight='weight'),nx.average_clustering(G4,weight='weight')]) spl=np.array([nx.average_shortest_path_length(G1,weight='weight'),nx.average_shortest_path_length(G2,weight='weight'),nx.average_shortest_path_length(G3,weight='weight'),nx.average_shortest_path_length(G4,weight='weight')]) nds=np.array([nx.number_of_nodes(G1),nx.number_of_nodes(G2),nx.number_of_nodes(G3),nx.number_of_nodes(G4)]) edgs= np.array([nx.number_of_edges(G1),nx.number_of_edges(G2),nx.number_of_edges(G3),nx.number_of_edges(G4)]) if valid(cc): cc=stats.zscore(cc) else: cc=np.array([.1,.1,.1,.1]) cc= cc-min(cc)+.1 if valid(spl): spl=stats.zscore(spl) else: spl=np.array([.1,.1,.1,.1]) spl= spl-min(spl)+.1 if valid(nds): nds=stats.zscore(nds) else: nds=np.array([.1,.1,.1,.1]) nds = nds-min(nds)+.1 if valid(edgs): edgs=stats.zscore(edgs) else: edgs=np.array([.1,.1,.1,.1]) edgs=edgs-min(edgs)+.1 r1=(w1*cc[0]+w2*spl[0]+w3*nds[0]+w4*edgs[0])*1000 r2=(w1*cc[1]+w2*spl[1]+w3*nds[1]+w4*edgs[1])*1000 r3=(w1*cc[2]+w2*spl[2]+w3*nds[2]+w4*edgs[2])*1000 r4=(w1*cc[3]+w2*spl[3]+w3*nds[3]+w4*edgs[3])*1000 d={'Player 1:': r1, 'Player 2:': r2,'Player 3:': r3, 'Player 4:': r4} rank = sorted(d.items(), key=lambda x: x[1], reverse=True) return ["USAU RANKINGS",str(rank[0][0])+ " " + str(int(rank[0][1])),str(rank[1][0])+" "+ str(int(rank[1][1])),str(rank[2][0])+" "+ str(int(rank[2][1])),str(rank[3][0])+" "+str(int(rank[3][1]))] except: return ["Unable to compute rankings! Need data","Player 1","Player 2","Player 3","Player 4"]
def compare_graphs(graph): n = nx.number_of_nodes(graph) m = nx.number_of_edges(graph) k = np.mean(list(nx.degree(graph).values())) erdos = nx.erdos_renyi_graph(n, p=m/float(n*(n-1)/2)) barabasi = nx.barabasi_albert_graph(n, m=int(k)-7) small_world = nx.watts_strogatz_graph(n, int(k), p=0.04) print(' ') print('Compare the number of edges') print(' ') print('My network: ' + str(nx.number_of_edges(graph))) print('Erdos: ' + str(nx.number_of_edges(erdos))) print('Barabasi: ' + str(nx.number_of_edges(barabasi))) print('SW: ' + str(nx.number_of_edges(small_world))) print(' ') print('Compare average clustering coefficients') print(' ') print('My network: ' + str(nx.average_clustering(graph))) print('Erdos: ' + str(nx.average_clustering(erdos))) print('Barabasi: ' + str(nx.average_clustering(barabasi))) print('SW: ' + str(nx.average_clustering(small_world))) print(' ') print('Compare average path length') print(' ') print('My network: ' + str(nx.average_shortest_path_length(graph))) print('Erdos: ' + str(nx.average_shortest_path_length(erdos))) print('Barabasi: ' + str(nx.average_shortest_path_length(barabasi))) print('SW: ' + str(nx.average_shortest_path_length(small_world))) print(' ') print('Compare graph diameter') print(' ') print('My network: ' + str(nx.diameter(graph))) print('Erdos: ' + str(nx.diameter(erdos))) print('Barabasi: ' + str(nx.diameter(barabasi))) print('SW: ' + str(nx.diameter(small_world)))
def subcomponent_stats(self, g_bound=10): for g in nx.connected_component_subgraphs(self.graph): if g.order() < g_bound: continue print "g order: ", g.order() print "g size: ", g.order() print "average shortest path length: ", nx.average_shortest_path_length(g) print "path length ratio: ", nx.average_shortest_path_length(g) / g.order() print "clustering coeff: ", nx.average_clustering(g)
def test_clustering(size): print("Barabasi-Albert:") ba = networkx.barabasi_albert_graph(1000, 4) print("Clustering: ", networkx.average_clustering(ba)) print("Average length: ", networkx.average_shortest_path_length(ba)) print("Watts-Strogatz:") ws = networkx.watts_strogatz_graph(size, 4, 0.001) print("Clustering: ", networkx.average_clustering(ws)) print("Average length: ", networkx.average_shortest_path_length(ws))
def test_average_shortest_path(self): l=nx.average_shortest_path_length(self.cycle) assert_almost_equal(l,2) l=nx.average_shortest_path_length(self.cycle,weighted=True) assert_almost_equal(l,2) l=nx.average_shortest_path_length(nx.path_graph(5)) assert_almost_equal(l,2) l=nx.average_shortest_path_length(nx.path_graph(5),weighted=True) assert_almost_equal(l,2)
def test_weighted_average_shortest_path(self): G=nx.Graph() G.add_cycle(range(7),weight=2) l=nx.average_shortest_path_length(G,weight=True) assert_almost_equal(l,4) G=nx.Graph() G.add_path(range(5),weight=2) l=nx.average_shortest_path_length(G,weight=True) assert_almost_equal(l,4)
def test_weighted(self): G = nx.Graph() nx.add_cycle(G, range(7), weight=2) ans = nx.average_shortest_path_length(G, weight='weight') assert_almost_equal(ans, 4) G = nx.Graph() nx.add_path(G, range(5), weight=2) ans = nx.average_shortest_path_length(G, weight='weight') assert_almost_equal(ans, 4)
def gen_graph_stats (graph): G = nx.read_graphml(graph) stats = {} edges, nodes = 0,0 for e in G.edges_iter(): edges += 1 for n in G.nodes_iter(): nodes += 1 stats['Edges'] = (edges,'The number of edges within the Graph') stats['Nodes'] = (nodes, 'The number of nodes within the Graph') print "%i edges, %i nodes" % (edges, nodes) # Accessing the highest degree node center, degree = sorted(G.degree().items(), key=itemgetter(1), reverse=True)[0] stats['Center Node'] = ('%s: %0.5f' % (center,degree),'The center most node in the graph. Which has the highest degree') hairball = nx.subgraph(G, [x for x in nx.connected_components(G)][0]) print "Average shortest path: %0.4f" % nx.average_shortest_path_length(hairball) stats['Average Shortest Path Length'] = (nx.average_shortest_path_length(hairball), '') # print "Center: %s" % G[center] # print "Shortest Path to Center: %s" % p print "Degree: %0.5f" % degree stats['Degree'] = (degree,'The node degree is the number of edges adjacent to that node.') print "Order: %i" % G.number_of_nodes() stats['Order'] = (G.number_of_nodes(),'The number of nodes in the graph.') print "Size: %i" % G.number_of_edges() stats['Size'] = (G.number_of_edges(),'The number of edges in the graph.') print "Clustering: %0.5f" % nx.average_clustering(G) stats['Average Clustering'] = (nx.average_clustering(G),'The average clustering coefficient for the graph.') print "Transitivity: %0.5f" % nx.transitivity(G) stats['Transitivity'] = (nx.transitivity(G),'The fraction of all possible triangles present in the graph.') part = community.best_partition(G) # values = [part.get(node) for node in G.nodes()] # nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) # plt.show() mod = community.modularity(part,G) print "modularity: %0.5f" % mod stats['Modularity'] = (mod,'The modularity of a partition of a graph.') knn = nx.k_nearest_neighbors(G) print knn stats['K Nearest Neighbors'] = (knn,'the average degree connectivity of graph.\nThe average degree connectivity is the average nearest neighbor degree of nodes with degree k. For weighted graphs, an analogous measure can be computed using the weighted average neighbors degre') return G, stats
def analyze_graph(G): """ Computes various network metrics for a graph G, returns a dictionary: values = { "charcount" = len(G.nodes()), "edgecount" = len(G.edges()), "maxdegree" = max(G.degree().values()) or "NaN" if ValueError: max() arg is an empty sequence, "avgdegree" = sum(G.degree().values())/len(G.nodes()) or "NaN" if ZeroDivisionError: division by zero, "density" = nx.density(G) or "NaN", "avgpathlength" = nx.average_shortest_path_length(G) or "NaN" if NetworkXError: Graph is not connected, then it tries to get the average_shortest_path_length from the giant component, "avgpathlength" = nx.average_shortest_path_length(max(nx.connected_component_subgraphs(G), key=len)) except NetworkXPointlessConcept: ('Connectivity is undefined ', 'for the null graph.'), "clustering_coefficient" = nx.average_clustering(G) or "NaN" if ZeroDivisionError: float division by zero } """ values = {} values["charcount"] = len(G.nodes()) values["edgecount"] = len(G.edges()) try: values["maxdegree"] = max(G.degree().values()) except: print("ValueError: max() arg is an empty sequence") values["maxdegree"] = "NaN" try: values["avgdegree"] = sum(G.degree().values())/len(G.nodes()) except: print("ZeroDivisionError: division by zero") values["avgdegree"] = "NaN" try: values["density"] = nx.density(G) except: values["density"] = "NaN" try: values["avgpathlength"] = nx.average_shortest_path_length(G) except nx.NetworkXError: print("NetworkXError: Graph is not connected.") try: values["avgpathlength"] = nx.average_shortest_path_length(max(nx.connected_component_subgraphs(G), key=len)) except: values["avgpathlength"] = "NaN" except: print("NetworkXPointlessConcept: ('Connectivity is undefined ', 'for the null graph.')") values["avgdegree"] = "NaN" try: values["clustering_coefficient"] = nx.average_clustering(G) except: print("ZeroDivisionError: float division by zero") values["clustering_coefficient"] = "NaN" return values
def average_shortest_path(self): undirected = self.graph.to_undirected() paths = [] try: paths.append(nx.average_shortest_path_length(self.graph)) except nx.networkx.exception.NetworkXError: for i, g in enumerate(nx.connected_component_subgraphs(undirected)): if len(g.nodes()) != 1: paths.append(nx.average_shortest_path_length(g)) return paths
def get_small_worldness(filename): import networkx as nx threshold = 0 f = open(filename[:-4]+'_small_worldness.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) cluster = nx.average_clustering(G) ER_cluster = nx.average_clustering(ER_graph) transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' %(threshold, cluster, ER_cluster, transi, ER_transi) f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values)/len(values))) if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) f.write("\n") f.close() print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity"
def evaluator(G): calc = list() ev1 = nx.average_clustering(G) if nx.is_connected(G) == True: ev2 = nx.average_shortest_path_length(G) else: for sub in nx.connected_component_subgraphs(G): if len(sub.nodes()) > 1: calc.append(nx.average_shortest_path_length(sub)) ev2 = sum(calc)/len(calc) print 'Average clustering and average shortest path length coefficients:', (ev1, ev2)
def get_average_shortest_path_len(syst, mat): graph = nx.from_numpy_matrix(syst.jacobian) try: spl = nx.average_shortest_path_length(graph) except nx.exception.NetworkXError: try: spl = np.mean([nx.average_shortest_path_length(g) \ for g in nx.connected_component_subgraphs(graph)]) except ZeroDivisionError: return None return spl
def test_lattice_reference(): G = nx.connected_watts_strogatz_graph(50, 6, 1, seed=rng) Gl = lattice_reference(G, niter=1, seed=rng) L = nx.average_shortest_path_length(G) Ll = nx.average_shortest_path_length(Gl) assert_true(Ll > L) assert_raises(nx.NetworkXError, lattice_reference, nx.Graph()) assert_raises(nx.NetworkXNotImplemented, lattice_reference, nx.DiGraph()) H = nx.Graph(((0, 1), (2, 3))) Hl = lattice_reference(H, niter=1)
def make_graph(self,save_graph=True): graph = nx.DiGraph() all_tweets = [tweet for page in self.results for tweet in page['results']] for tweet in all_tweets: rt_sources = self.get_rt_sources(tweet["text"]) if not rt_sources: continue for rt_source in rt_sources: graph.add_edge(rt_source, tweet["from_user"], {"tweet_id": tweet["id"]}) #--Calculate graph summary statistics if nx.is_connected(graph.to_undirected()): diameter = nx.diameter(graph.to_undirected()) average_shortest_path = nx.average_shortest_path_length(graph.to_undirected()) print 'Diameter: ', diameter print 'Average Shortest Path: ',average_shortest_path else: print "Graph is not connected so calculating the diameter and average shortest path length on all connected components." diameter = [] average_shortest_path = [] for subgraph in nx.connected_component_subgraphs(graph.to_undirected()): diameter.append(nx.diameter(subgraph)) average_shortest_path.append(nx.average_shortest_path_length(subgraph)) from numpy import median from scipy.stats import scoreatpercentile print 'Diameter: ',median(diameter),u'\xB1',str(scoreatpercentile(diameter,75)-scoreatpercentile(diameter,25)) print 'Average Path Length :',median(average_shortest_path),u'\xB1',str(scoreatpercentile(average_shortest_path,75)-scoreatpercentile(average_shortest_path,25)) degree_sequence=sorted(nx.degree(graph).values(),reverse=True) # degree sequence import matplotlib.pyplot as plt plt.loglog(degree_sequence,'b-',marker='o') plt.title("Distribution of Degrees for %s tweets" %(self.drug_name), fontsize=20) plt.ylabel("Degree", fontsize=20) plt.xlabel("Rank", fontsize=20) # draw graph in inset ax = plt.axes([0.35,0.25,0.55,0.55]) plt.axis('off') nx.draw(graph, ax=ax, alpha=0.8, with_labels=False) plt.savefig("degree_distribution_%s.png"%(self.drug_name.replace(' ','_')), dpi=300) plt.close() if save_graph: output_file = self.drug_name.replace(' ','_') + '.dot' try: nx.drawing.write_dot(graph,output_file) print 'Graph saved as ',output_file except (ImportError, UnicodeEncodeError) as e: dot = ['"%s" -> "%s" [tweetid=%s]' % (node1,node2,graph[node1][node2]['tweet_id']) for node1,node2, in graph.edges()] with codecs.open(output_file,'w', encoding='utf-8') as f: f.write('strict digraph G{\n%s\n}' % (';\n'.join(dot),)) print 'Saved ',output_file,' by brute force' return diameter, average_shortest_path
def compute_measures(bigDict): """ Computes the measures for each network Measures to compute: nr_of_nodes nr_of_edges max_edge_value min_edge_value is_connected number_connected_components average_unweighted_node_degree average_weighted_node_degree average_clustering_coefficient average_weighted_shortest_path_length average_unweighted_shortest_path_length To be added: single node values, e.g. node degree of brainstem etc. Non-scalar return values: (not used yet) degree_distribution edge_weight_distribution """ returnMeasures = {} for key, netw in bigDict.items(): outm = {} outm['nr_of_nodes'] = netw.number_of_nodes() outm['nr_of_edges'] = netw.number_of_edges() outm['max_edge_value'] = np.max([d['weight']for f,t,d in netw.edges(data=True)]) outm['min_edge_value'] = np.min([d['weight']for f,t,d in netw.edges(data=True)]) outm['is_connected'] = nx.is_connected(netw) outm['number_connected_components'] = nx.number_connected_components(netw) outm['average_unweighted_node_degree'] = np.mean(nx.degree(netw, weighted = False).values()) outm['average_weighted_node_degree'] = np.mean(nx.degree(netw, weighted = True).values()) outm['average_clustering_coefficient'] = nx.average_clustering(netw) outm['average_weighted_shortest_path_length'] = nx.average_shortest_path_length(netw, weighted = True) outm['average_unweighted_shortest_path_length'] = nx.average_shortest_path_length(netw, weighted = False) returnMeasures[key] = outm return returnMeasures
def myavgpathlength(G): try: apl = nx.average_shortest_path_length(G) return [apl] except nx.NetworkXError as e: #this means graph is not connected if isinstance(G,nx.DiGraph): return [nx.average_shortest_path_length(nx.strongly_connected_component_subgraphs(G)[0])] else: return [nx.average_shortest_path_length(nx.connected_component_subgraphs(G)[0])] except ZeroDivisionError as e: return [1]
def get_small_worldness(G, thr): f = open(out_prfx + 'small_worldness.dat', 'a') g = open(out_prfx + 'cc_trans_ER.dat', 'a') #g.write('r(thre.)\t\cc_A\tcc_ER\ttran_A\ttran_ER\n') ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) # erdos-renyi, binomial random graph generator ...(N,D:density) cluster = nx.average_clustering(G) # clustering coef. of whole network ER_cluster = nx.average_clustering(ER_graph) #cc of random graph transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) g.write("%f\t%f\t%f\t%f\t%f\n" % (thr, cluster,ER_cluster,transi,ER_transi )) f.write("%f\t%f\t%f" % (thr, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values)/len(values))) # pathlenght if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) # S_WS ~ small worldness f.write("\n") f.close() g.close()
def eigenvector_apl(g, recalculate=False): """ Performs robustness analysis based on eigenvector centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ m = networkx.eigenvector_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) x = [] y = [] average_path_length = 0.0 number_of_components = 0 n = len(g.nodes()) for sg in networkx.connected_component_subgraphs(g): average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length /= number_of_components initial_apl = average_path_length r = 0.0 for i in range(1, n - 1): g.remove_node(l.pop(0)[0]) if recalculate: try: m = networkx.eigenvector_centrality(g, max_iter=5000) except networkx.NetworkXError: break l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) average_path_length = 0.0 number_of_components = 0 for sg in networkx.connected_component_subgraphs(g): if len(sg.nodes()) > 1: average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length = average_path_length / number_of_components x.append(i * 1. / initial_apl) r += average_path_length * 1. / initial_apl y.append(average_path_length * 1. / initial_apl) return x, y, r / initial_apl
def draw_connected_components(g): # get the connected components cc = nx.connected_components(g) i = 0 for c in cc: # extract the subgraph identifying the actual component sub = g.subgraph(c) # plot only components having at least 3 nodes if len(sub) > 3: nx.draw(sub) plt.show() print (len(sub)) print nx.average_shortest_path_length(sub)
def compute_singlevalued_measures(ntwk, weighted=True, calculate_cliques=False): """ Returns a single value per network """ iflogger.info('Computing single valued measures:') measures = {} iflogger.info('...Computing degree assortativity (pearson number) ...') try: measures['degree_pearsonr'] = nx.degree_pearsonr(ntwk) except AttributeError: # For NetworkX 1.6 measures[ 'degree_pearsonr'] = nx.degree_pearson_correlation_coefficient( ntwk) iflogger.info('...Computing degree assortativity...') try: measures['degree_assortativity'] = nx.degree_assortativity(ntwk) except AttributeError: measures['degree_assortativity'] = nx.degree_assortativity_coefficient( ntwk) iflogger.info('...Computing transitivity...') measures['transitivity'] = nx.transitivity(ntwk) iflogger.info('...Computing number of connected_components...') measures['number_connected_components'] = nx.number_connected_components( ntwk) iflogger.info('...Computing graph density...') measures['graph_density'] = nx.density(ntwk) iflogger.info('...Recording number of edges...') measures['number_of_edges'] = nx.number_of_edges(ntwk) iflogger.info('...Recording number of nodes...') measures['number_of_nodes'] = nx.number_of_nodes(ntwk) iflogger.info('...Computing average clustering...') measures['average_clustering'] = nx.average_clustering(ntwk) if nx.is_connected(ntwk): iflogger.info('...Calculating average shortest path length...') measures[ 'average_shortest_path_length'] = nx.average_shortest_path_length( ntwk, weighted) else: iflogger.info('...Calculating average shortest path length...') measures[ 'average_shortest_path_length'] = nx.average_shortest_path_length( nx.connected_component_subgraphs(ntwk)[0], weighted) if calculate_cliques: iflogger.info('...Computing graph clique number...') measures['graph_clique_number'] = nx.graph_clique_number( ntwk) # out of memory error return measures
def whole_graph_metrics(graph, weighted=False): graph_metrics = {} # Shortest average path length graph_metrics['avg_shortest_path'] = \ nx.average_shortest_path_length(graph, weight=weighted) # Average eccentricity ecc_dict = nx.eccentricity(graph) graph_metrics['avg_eccentricity'] = np.mean(np.array(ecc_dict.values())) # Average clustering coefficient # NOTE: Option to include or exclude zeros graph_metrics['avg_ccoeff'] = \ nx.average_clustering(graph, weight=weighted, count_zeros=True) # Average node betweeness avg_node_btwn_dict = nx.betweenness_centrality(graph, normalized=True) graph_metrics['avg_node_btwn'] = \ np.mean(np.array(avg_node_btwn_dict.values())) # Average edge betweeness avg_edge_btwn_dict = nx.edge_betweenness_centrality(graph, normalized=True) graph_metrics['avg_edge_btwn'] = \ np.mean(np.array(avg_edge_btwn_dict.values())) # Number of isolates graph_metrics['isolates'] = len(nx.isolates(graph)) return graph_metrics
def get_single_network_measures(G, thr): f = open(out_prfx + 'single_network_measures.dat', 'a') N = nx.number_of_nodes(G) L = nx.number_of_edges(G) D = nx.density(G) cc = nx.average_clustering(G) compon = nx.number_connected_components(G) Con_sub = nx.connected_component_subgraphs(G) values = [] values_2 =[] for node in G: values.append(G.degree(node)) ave_deg = float(sum(values)) / float(N) f.write("%f\t%d\t%f\t%f\t%f\t%f\t" % (thr, L, D, cc, ave_deg, compon)) #1. threshold, 2. edges, 3. density 4.clustering coefficient #5. average degree, 6. number of connected components for i in range(len(Con_sub)): if nx.number_of_nodes(Con_sub[i])>1: values_2.append(nx.average_shortest_path_length(Con_sub[i])) if len(values_2)==0: f.write("0.\n") else: f.write("%f\n" % (sum(values_2)/len(values_2))) #7. shortest pathway f.close()
def printStats(filename): ''' Converts json adjacency list into networkx to calculate and print the graphs's - average clustering coefficient - overall clustering coefficient - maximum diameter - average diameter - number of paritions using community.best_parition - modularity of community.best_partition ''' g = makeGraphFromJSON(filename) print "Average Clustering Coefficient: %f" % nx.average_clustering(g) print "Overall Clustering Coefficient: %f" % nx.transitivity(g) connected_subgraphs = list(nx.connected_component_subgraphs(g)) largest = max(nx.connected_component_subgraphs(g), key=len) print "# Connected Components: %d" % len(connected_subgraphs) print " Maximal Diameter: %d" % nx.diameter(largest) print " Average Diameter: %f" % nx.average_shortest_path_length(largest) # Find partition that maximizes modularity using Louvain's algorithm part = community.best_partition(g) print "# Paritions: %d" % (max(part.values()) + 1) print "Louvain Modularity: %f" % community.modularity(part, g)
def compute_smoothness(G): """ Smoothness is the difference between the Euclidean Distance and the distance on the graph of any pair of leaves of the given Tree """ leaves = extract_leaves(G) total_error=0 sp_avg_len = nx.average_shortest_path_length(G) for s_index in range(0, len(leaves)): for t_index in range(s_index+1, len(leaves)): s_id = leaves[s_index] t_id = leaves[t_index] s = G.node[s_id] t = G.node[t_id] curr_distance = compute_euclidean_distance(s, t) path_distance = compute_distance_on_graph(G, s_id, t_id) sp_length = nx.shortest_path_length(G, s_id, t_id) penalization = (math.e**(-sp_avg_len)) curr_error = (path_distance-curr_distance)*penalization total_error += curr_error print(total_error)
def analyse_data(data, threshold=0.66): """ perform graph theory analysis on data Parameters ---------- data: dict the keys are the names of the datasets and the values are dicts that include 'corr' which represents the corr matrix from which to derive the graph Returns ------- result: dict of graph theory results the keys are the names of the datasets the values are another dict containing 'L' - the average shortest path length 'CC' - the average clustering coefficient 'DD' - the degree histogram 'Nodes' - the number of nodes in the graph 'Edges' - the number of edges in the graph """ result = dict() for label, dataset in data.items(): summary = dict() corr = dataset['corr'] graph, _ = corr_matrix_to_graph(corr, threshold=threshold) summary['L'] = nx.average_shortest_path_length(graph) summary['CC'] = nx.average_clustering(graph) summary['DD'] = nx.degree_histogram(graph) summary['Nodes'] = graph.number_of_nodes() summary['Edges'] = graph.number_of_edges() result[label] = summary return result
def ws_calc(path): """ Given a path to a file graph generated by the GMM, calucualte C(p) and L(p) """ G=nx.read_graphml(path) file_split=path.split('_') return({'p':float(file_split[4]), 'cc':nx.average_clustering(G), 'avg.pl':nx.average_shortest_path_length(G)})
def answer_seven(): G = answer_six() return nx.average_shortest_path_length(G)
import numpy as np import matplotlib.mlab as mlab import math from scipy.stats import norm Folder_to_load_from= '{}'.format(Folder_to_load_from) Folder_to_save_graphs=str(Folder_to_save_graphs) Name_of_stat_file='{}'.format(Name_of_stat_file) stats={} #create graph G=ox.save_load.load_graphml(Graphml_File_Name, folder=Folder_to_load_from) #average shortest path length of graph stats['average_shortest_path_length']=nx.average_shortest_path_length(G,weight='length') # find the all_pairs_dijkstra_path_length stats['all_pairs_dijkstra_path_length']=nx.all_pairs_dijkstra_path_length(G, cutoff=15000, weight='length') shortlengthsum={key: sum(value.itervalues()) for key, value in length.iteritems()} Total=sum(shortlengthsum.values()) N=G.number_of_nodes() Mean_length_shortest=float(Total)/N dict2 = {key:float(value)/Mean_length_shortest for key, value in shortlengthsum.items()}