def measure_between_group_connectivity(induced, name = '', plots = False): overall_conn = approx.node_connectivity(induced) print('Minimum number of nodes that must be removed to disconnect studets: '+str(overall_conn)) pairwise_conn = approx.all_pairs_node_connectivity(induced) avg_cluster = approx.average_clustering(induced) print('Mean of the fraction of triangles that actually exist over all possible triangles in each neighborhood: '+str(avg_cluster)) avg_cluster = approx.average_clustering(induced) print('Mean of the fraction of triangles that actually exist over all possible triangles in each neighborhood: '+str(avg_cluster)) if plots: p.pairwise_conn_dist(pairwise_conn, name)
def build_graph(): """generate graph representation of the training facts """ # G = nx.DiGraph() G = nx.Graph() with open('data/initial_label_facts.json') as f_in: label_triples = json.load(f_in) rel_colors = { "use":"red", "different from":'green', "subclass of":'blue', "has quality":'purple', "instance of":'yellow', "facet of":'brown'} for rel, vals in label_triples.items(): for pair in vals: G.add_edge(pair[0], pair[1], color=rel_colors.get(rel, 'black')) # print(len(list(nx.connected_components(G)))) # print(sorted(d for n, d in G.degree())) # print(nx.clustering(G)) print(approximation.average_clustering(G)) # write_dot(G, 'test.dot') # $ dot -Tpng test.dot >test.png return label_triples
def measure_connectivity(G,grouping = None): overall_conn = approx.node_connectivity(G) print('Minimum number of nodes that must be removed to disconnect studets: '+str(overall_conn)) pairwise_conn = approx.all_pairs_node_connectivity(G) plt.title('Distribution of Min Removed Nodes to Disconnect Pair') plt.hist(pairwise_conn.values()) avg_cluster = approx.average_clustering(G) print('Mean of the fraction of triangles that actually exist over all possible triangles in each neighborhood: '+str(avg_cluster))
def _extract_features_for_subgraph(self, graph): res = {} deg_list = [i[1] for i in nx.degree(graph)] weights_list = [ graph[edge[0]][edge[1]]['weight'] for edge in graph.edges ] res['connected'] = [1 if nx.is_connected(graph) else 0] res['density'] = ['{:.6f}'.format(nx.density(graph))] res['Avg_CC'] = [aprox.average_clustering(graph)] res['Median_deg'] = ['{:.6f}'.format(np.median(deg_list))] res['Variance_deg'] = ['{:.6f}'.format(np.var(deg_list))] res['Median_wights'] = [ '{:.6f}'.format( np.median(weights_list) if len(weights_list) > 0 else -1) ] res['Variance_wights'] = [ '{:.6f}'.format( np.var(weights_list) if len(weights_list) > 0 else 0) ] res['Avg_degree'] = [ '{:.6f}'.format(sum(deg_list) / len(nx.degree(graph))) ] res['Avg_weight'] = [ '{:.6f}'.format( sum(weights_list) / len(weights_list) if len(weights_list) > 0 else -1) ] res['Avg_weight_abs'] = [ '{:.6f}'.format( abs( sum(weights_list) / len(weights_list) if len(weights_list) > 0 else -1)) ] res['edges'] = [len(graph.edges)] res['nodes'] = [len(graph.nodes)] res['self_loops'] = [len(list(nx.nodes_with_selfloops(graph)))] res['edge_to_node_ratio'] = [ '{:.6f}'.format( len(graph.nodes) / len(graph.edges) if len(graph.edges) > 0 else len(graph.nodes)) ] res['negative_edges'] = [ len([ edge for edge in graph.edges if graph[edge[0]][edge[1]]['weight'] < 0 ]) ] res['Num_of_zero_weights'] = [ len([ e for e in graph.edges if 0.005 > abs(graph[e[0]][e[1]]['weight'] > 0) ]) ] res['min_vc'] = [len(aprox.min_weighted_vertex_cover(graph))] for key in res.keys(): res[key] = [float(res[key][0])] return res
def main(): fb = read_graph('facebook_combined.txt.gz') fb_clustering = average_clustering(fb) fb_length = estimate_path_length(fb) n = len(fb) m = len(fb.edges()) k = int(round(m / n)) hk = hk_graph_modified(n, 1) # generate_pmf(fb, hk) # generate_cdf(fb, hk) generate_ccdf(fb, hk) print("Degrees:", len(degrees(fb)), len(degrees(hk))) print("Clustering:", fb_clustering, average_clustering(hk)) print("Path length:", fb_length, estimate_path_length(hk)) print("Mean degrees:", np.mean(degrees(fb)), np.mean(degrees(hk)))
def analyze_clustering(G): average_clustering_coefficient = approximation.average_clustering(G) average_clustering = nx.average_clustering(G) average_shortest_path_length = nx.average_shortest_path_length(G) local_efficiency = nx.local_efficiency(G) global_efficiency = nx.global_efficiency(G) table = prettytable.PrettyTable( ['Average clustering', 'Average clustering coefficient', 'Average shortest path length']) table.add_row([average_clustering, average_clustering_coefficient, average_shortest_path_length]) print(table) table = prettytable.PrettyTable(['Local efficiency', 'Global efficiency']) table.add_row([local_efficiency, global_efficiency]) print(table)
def extract_graph_features(self, graph): """ ref: https://networkx.github.io/documentation/stable/_modules/networkx/algorithms/approximation/vertex_cover.html ref: https://networkx.github.io/documentation/stable/reference/algorithms/approximation.html#module-networkx.algorithms.approximation """ res = {} deg_list = [i[1] for i in nx.degree(graph)] weights_list = [ graph[edge[0]][edge[1]]['weight'] for edge in graph.edges ] if len(weights_list) == 0: return None # try: # weights_list = [graph[edge[0]][edge[1]]['weight'] for edge in graph.edges] # except: # return None res['connected'] = 1 if nx.is_connected(graph) else 0 res['density'] = '{:.6f}'.format(nx.density(graph)) res['Avg_CC'] = aprox.average_clustering(graph) res['Median_deg'] = '{:.6f}'.format(np.median(deg_list)) res['Variance_deg'] = '{:.6f}'.format(np.var(deg_list)) res['Median_wights'] = '{:.6f}'.format(np.median(weights_list)) res['Variance_wights'] = '{:.6f}'.format(np.var(weights_list)) res['Avg_degree'] = '{:.6f}'.format( sum(deg_list) / len(nx.degree(graph))) res['Avg_weight'] = '{:.6f}'.format( sum(weights_list) / len(weights_list)) res['Avg_weight_abs'] = '{:.6f}'.format( abs(sum(weights_list) / len(weights_list))) res['edges'] = len(graph.edges) res['nodes'] = len(graph.nodes) res['self_loops'] = len(list(nx.nodes_with_selfloops(graph))) res['edge_to_node_ratio'] = '{:.6f}'.format( len(graph.nodes) / len(graph.edges)) res['negative_edges'] = len([ edge for edge in graph.edges if graph[edge[0]][edge[1]]['weight'] < 0 ]) res['Num_of_zero_weights'] = len([ e for e in graph.edges if 0.005 > abs(graph[e[0]][e[1]]['weight'] > 0) ]) res['min_vc'] = len(aprox.min_weighted_vertex_cover(graph)) return res
def analyze_graph(G): G.graph['directed'] = nx.is_directed(G) G_und = G.to_undirected() G.graph['connected_components'] = nx.number_connected_components(G_und) G.graph['largest_component'] = len( max(nx.connected_components(G_und), key=len)) logging.info("Graph ID {}: components analyzed.".format( G.graph['graph_id'])) G.graph['average_clustering'] = approximation.average_clustering(G_und) logging.info("Graph ID {}: clustering analyzed.".format( G.graph['graph_id'])) degrees = [d for n, d in G.degree()] G.graph['min_degree'] = min(degrees), max(degrees), np.mean( degrees), np.median(degrees) G.graph['max_degree'] = max(degrees) G.graph['avg_degree'] = np.mean(degrees) G.graph['std_degree'] = np.std(degrees) G.graph['median_degree'] = np.median(degrees) logging.info("Graph ID {}: degrees analyzed.".format( G.graph['graph_id']))
def analyze_graph(G, verbose=False): """Analyzes 'G' for relevant characteristics. G: NetworkX graph verbose: print characteristics if True return: n: number of nodes in G m: number of edges in G k: int of average degree (edges per node) degs: list of degrees of G """ n = len(G) m = len(G.edges()) k = int(round(m/n)) C = average_clustering(G) L = estimate_path_length(G) degs = degrees(G) if verbose: print('n: %i m: %i k: %i' %(n,m,k)) print('clustering: ',C, 'path length: ',L) print('average degree: %.2f degree variance: %.2f' %(np.mean(degs), np.var(degs))) return n, m, k, degs
def measure_connectivity(G, out, grouping=None): # overall_conn = approx.node_connectivity(G) # print('Minimum number of nodes that must be removed to disconnect studets: '+str(overall_conn)) pairwise_conn = approx.all_pairs_node_connectivity(G) plt.title('Distribution of Min Removed Nodes to Disconnect Pair') connlist = [] for subdict in pairwise_conn.values(): avgconn = sum(list(subdict.values())) / len(subdict.values()) connlist.append(avgconn) temp = pd.DataFrame(connlist, columns=['pairwise_conn']) temp = temp.reset_index() temp["rank"] = temp['pairwise_conn'].rank(method='average', ascending=False) temp = temp.sort_values(by=['index'], ascending=False) plt.scatter(y=temp['pairwise_conn'], x=temp['rank'], alpha=0.7) plt.xlabel('Rank') plt.ylabel('Count') plt.savefig(out + 'zipf_pairwiseconnrank.png') plt.close() avg_cluster = approx.average_clustering(G) print( 'Mean of the fraction of triangles that actually exist over all possible triangles in each neighborhood: ' + str(avg_cluster))
def test_complete(): G = nx.complete_graph(5) assert average_clustering(G, trials=int(len(G) / 2)) == 1 G = nx.complete_graph(7) assert average_clustering(G, trials=int(len(G) / 2)) == 1
def test_empty(): G = nx.empty_graph(5) assert average_clustering(G, trials=int(len(G) / 2)) == 0
def test_dodecahedral(): # Actual coefficient is 0 G = nx.dodecahedral_graph() assert (average_clustering(G, trials=int(len(G) / 2)) == nx.average_clustering(G))
return lengths def estimate_path_length(G, nodes=None, trials=1000): """ estimates the average shortest path length of a given Graph """ return np.mean(sample_path_lengths(G, nodes, trials)) fb = read_graph('facebook_combined.txt.gz') n = len(fb) m = len(fb.edges()) k_fb = int(round(2 * m / n)) # I don't get why we double count the edges, but print(f"fb Graph nodes = {n}, edges = {m}") C_fb = average_clustering(fb) L_fb = estimate_path_length(fb) """ now construct a WS graph with the same n, k; Downey figured out by trial and error that when p = 0.05, the C and Ls are comparable. """ ws = nx.watts_strogatz_graph(n, k_fb, 0.05, seed=15) print(f"Constructing Watts-Strogatz Graph, WS({n}, {k_fb}, 0.05)") C_ws = average_clustering(ws) L_ws = estimate_path_length(ws) print("graph \t n \t C \t L \t mu_k \t sigma_k") print(f"fb \t {n} \t {C_fb} \t {L_fb} \t " f"{np.mean(degrees(fb)):.1f} \t {np.std(degrees(fb)):.1f}") print(f"WS \t {n} \t {C_ws} \t {L_ws} \t " f"{np.mean(degrees(ws)):.1f} \t {np.std(degrees(ws)):.1f}") """ now use probability mass function objects to check the probability that a node has a particular degree """
def test_dodecahedral(): # Actual coefficient is 0 G = nx.dodecahedral_graph() assert_equal(average_clustering(G, trials=int(len(G) / 2)), nx.average_clustering(G))
def average_clustering(graph: nx.Graph, approximate=True): # only for undirected graphs if approximate: return approximation.average_clustering(graph) return cluster.average_clustering(graph)
def ver_medidas(G): print(function.info(G)) """ Numero minimo de nodos que deben ser removidos para desconectar G """ print("Numero minimo de nodos que deben ser removidos para desconectar G :"+str(approximation.node_connectivity(G))) """ average clustering coefficient of G. """ print("average clustering coefficient of G: "+str(approximation.average_clustering(G))) """ Densidad de un Grafo """ print("Densidad de G: "+str(function.density(G))) """ Assortativity measures the similarity of connections in the graph with respect to the node degree. Valores positivos de r indican que existe una correlacion entre nodos con grado similar, mientras que un valor negativo indica correlaciones entre nodos de diferente grado """ print("degree assortativity:"+str(assortativity.degree_assortativity_coefficient(G))) """ Assortativity measures the similarity of connections in the graph with respect to the given attribute. """ print("assortativity for node attributes: "+str(assortativity.attribute_assortativity_coefficient(G,"crime"))) """ Grado promedio vecindad """ plt.plot(assortativity.average_neighbor_degree(G).values()) plt.title("Grado promedio vecindad") plt.xlabel("Nodo") plt.ylabel("Grado") plt.show(); """ Grado de Centralidad de cada nodo """ plt.plot(centrality.degree_centrality(G).values()) plt.title("Grado de centralidad") plt.xlabel("Nodo") plt.ylabel("Centralidad") plt.show(); """ Calcular el coeficiente de agrupamiento para nodos """ plt.plot(cluster.clustering(G).values()) plt.title("coeficiente de agrupamiento") plt.xlabel("Nodo") plt.show(); """ Media coeficiente de Agrupamiento """ print("Coeficiente de agrupamiento de G:"+str(cluster.average_clustering(G))) """ Centro del grafo El centro de un grafo G es el subgrafo inducido por el conjunto de vertices de excentricidad minima. La excentricidad de v in V se define como la distancia maxima desde v a cualquier otro vertice del grafo G siguiendo caminos de longitud minima. """ print("Centro de G:"+ str(distance_measures.center(G))) """ Diametro de un grafo The diameter is the maximum eccentricity. """ print("Diametro de G:"+str(distance_measures.diameter(G))) """ Excentricidad de cada Nodo The eccentricity of a node v is the maximum distance from v to all other nodes in G. """ plt.plot(distance_measures.eccentricity(G).values()) plt.title("Excentricidad de cada Nodo") plt.xlabel("Nodo") plt.show(); """ Periferia The periphery is the set of nodes with eccentricity equal to the diameter. """ print("Periferia de G:") print(distance_measures.periphery(G)) """ Radio The radius is the minimum eccentricity. """ print("Radio de G:"+str(distance_measures.radius(G))) """ PageRank calcula una clasificacion de los nodos en el grafico G en funcion de la estructura de los enlaces entrantes. Originalmente fue disenado como un algoritmo para clasificar paginas web. """ plt.plot(link_analysis.pagerank_alg.pagerank(G).values()) plt.title("Puntaje de cada Nodo") plt.xlabel("Nodo") plt.show(); """ Coeficiente de Small World. A graph is commonly classified as small-world if sigma>1. """ print("Coeficiente de Small World: " + str(smallworld.sigma(G))) """ The small-world coefficient (omega) ranges between -1 and 1. Values close to 0 means the G features small-world characteristics. Values close to -1 means G has a lattice shape whereas values close to 1 means G is a random graph. """ print("Omega coeficiente: "+str(smallworld.omega(G)))
edgecnt = G.number_of_edges() print('{} has {} nodes in its network.'.format(that_year, nodecnt)) print('{} has {} edges in its network.'.format(that_year, edgecnt)) ###GRAPH DENSITY den = nx.density(G) print('{} has a density of {}.'.format(that_year, round(den, 5))) ###DEGREE ASSORTATIVITY deg_assort = nx.degree_assortativity_coefficient(G) print('{} has a degree assortativity of {}.'.format( that_year, round(deg_assort, 5))) ###APPROXIMATE AVERAGE CLUSTERING COEFFICIENT### avg_clust = approx.average_clustering(G, trials=10000) print('{} has an average clustering of {}.'.format(that_year, avg_clust)) ###DEGREE OF FRAGMENTATION--BORGATTI'S KEY PLAYER PROBLEM### denominator = nodecnt * (nodecnt - 1) sum_of_distance = 0.0 distance_matrix = nx.all_pairs_shortest_path_length(G) for source, destinations in distance_matrix: for destination, length in destinations.items(): if not source == destination: sum_of_distance += 1 / length degree_of_fragmentation = (2 * sum_of_distance) / denominator print('{} has a degree of fragmentation of {}.'.format( that_year, degree_of_fragmentation)) ###NUMBER OF CLIQUES###
return lengths def estimate_path_length(G, nodes=None, trials=1000): return np.mean(sample_path_lengths(G, nodes, trials)) def read_graph(filename): G = nx.Graph() array = np.loadtxt(filename, dtype=int) G.add_edges_from(array) return G # https://snap.stanford.edu/data/facebook_combined.txt.gz fb = read_graph('../../data/facebook_combined.txtz') n, m = len(fb), len(fb.edges()) # (4039, 88234) C = average_clustering(fb) L = estimate_path_length(fb) k = int(round(2 * m / n)) # 44 lattice = nx.watts_strogatz_graph(n, k, p=0) len(lattice), len(lattice.edges()) # (4039, 88858) C, average_clustering(lattice) # (0.615, 0.747) L, estimate_path_length(lattice) # (3.717, 47.088)
def test_complete(): G = nx.complete_graph(5) assert_equal(average_clustering(G, trials=int(len(G) / 2)), 1) G = nx.complete_graph(7) assert_equal(average_clustering(G, trials=int(len(G) / 2)), 1)
def test_petersen(): # Actual coefficient is 0 G = nx.petersen_graph() assert_equal(average_clustering(G, trials=int(len(G) / 2)), nx.average_clustering(G))
def test_complete(): G = nx.complete_graph(5) assert_equal(average_clustering(G, trials=int(len(G) / 2)), 1) G = nx.complete_graph(7) assert_equal(average_clustering(G, trials=int(len(G) / 2)), 1)
def test_empty(): G = nx.empty_graph(5) assert_equal(average_clustering(G, trials=int(len(G) / 2)), 0)
def test_petersen(): # Actual coefficient is 0 G = nx.petersen_graph() assert (average_clustering(G, trials=int(len(G) / 2)) == nx.average_clustering(G))
annot = sep + "Metrics added {} from {}.py".format(now, this_file) + \ "\n\n" + sep with open("network_metrics.txt", 'a') as metrics_file: metrics_file.write(annot) metrics_file.write(nx.info(latinx_g) + "\n\n") metrics_file.write(nx.info(todes_g) + "\n\n") # ---------------------------------------------------------------------------- # # AVG CLUSTER COEFFICIENT # ---------------------------------------------------------------------------- # logging.info("calculating cluster coeff for each network") todes_gu = nx.to_undirected(todes_g) latinx_gu = nx.to_undirected(latinx_g) t_cluster_coeff = appx.average_clustering(todes_gu, trials=10000, seed=115) l_cluster_coeff = appx.average_clustering(latinx_gu, trials=10000, seed=115) # write each cluster coefficient with open("network_metrics.txt", 'a') as metrics_file: metrics_file.write("Latinx network average cluster coeff: {} \n\n".format( l_cluster_coeff)) metrics_file.write( "Todes network average cluster coeff: {} \n\n".format(t_cluster_coeff)) # ---------------------------------------------------------------------------- # # NETWORK DENSITY # ---------------------------------------------------------------------------- # logging.info("calculating network density") with open("network_metrics.txt", 'a') as metrics_file: metrics_file.write("Latinx Density: {}\n\n".format(nx.density(latinx_g)))
def test_tetrahedral(): # Actual coefficient is 1 G = nx.tetrahedral_graph() assert average_clustering(G, trials=int(len(G) / 2)) == nx.average_clustering(G)
print("Instances topics\n") for key, value in instance2topics.items(): print(key,value) """ Assortativity """ r=nx.degree_pearson_correlation_coefficient(mastodon_digraph) print(r) """ Network Analysis Undirected """ mastodon_undirected = mastodon_digraph.to_undirected() average_node_degree = nx.average_degree_connectivity(mastodon_undirected) print ("average node degree", average_node_degree.keys()) average_clustering = approx.average_clustering(mastodon_undirected) print("Average Clustering: ", average_clustering) node_connectivity = approx.node_connectivity(mastodon_undirected) print("Node connectivity: ", node_connectivity) """ Max degree node + Ego Network """ #find node with largest degree node_and_degree = mastodon_digraph.degree() (largest_hub, degree) = sorted(node_and_degree, key=itemgetter(1))[-1] # Create ego graph of main hub hub_ego = nx.ego_graph(mastodon_digraph, largest_hub) # Draw graph pos = nx.spring_layout(hub_ego)
# exact, slow # print('node connectivity:', node_connectivity(G.to_undirected())) # print('edge connectivity:', edge_connectivity(G.to_undirected())) #print('------------DAG-------------', file = f) print('Is DAG (should be true):\t', is_directed_acyclic_graph(G), file=f) print('Longest path length:\t', dag_longest_path_length(G), file=f) #print('--------Clustering--------') # exact, slow # print('Average clustering coefficient (undirected):', average_clustering(G.to_undirected())) # approximate, fast print('Average clustering coefficient (undirected, approx.):\t', approx.average_clustering(G.to_undirected()), file=f) # reprint in the file \t \t \t \t \t print("\n\n\n\n", file=f) #---------Nodes & Edges--------' print(G.number_of_nodes(), end="\t", file=f) print(len([ k for k in list(nx.weakly_connected_components(G)) if (len(k) == 1) ]), end="\t", file=f) print(percentage, end="\t", file=f) print(G.number_of_edges(), end="\t", file=f) #--------Connectivity--------
net = nx.Graph(nx.read_pajek(path)) d = dict() d['Graph'] = g d['n_nodes'] = net.number_of_nodes() d['n_edges'] = net.number_of_edges() node_list = list(net.nodes()) degrees = [x[1] for x in net.degree(node_list)] average_dg = np.average(degrees) max_dg = np.max(degrees) min_dg = np.min(degrees) d['min_degree'] = round(min_dg,4) d['max_degree'] = round(max_dg,4) d['av_degree'] = round(average_dg,4) #print('Maximum degree: ', max_dg, ' Minimum degree: ', min_dg, ' Average degree: ', average_dg) d['clustering_cof'] = round(approximation.average_clustering(net),4) d['assortativity'] = round(degree_assortativity_coefficient(net),4) d['av_path_length'] = round(average_shortest_path_length(net),4) d['diameter'] = round(diameter(net),4) df = df.append(d, ignore_index=True) df.to_csv('descriptors.csv', index=False)
def test_petersen_seed(): # Actual coefficient is 0 G = nx.petersen_graph() assert_equal(average_clustering(G, trials=int(len(G) / 2), seed=1), nx.average_clustering(G))