def calc_cluster_coefficient(hcc, V): # check max weight first (random groups may have a max of 0) max_w = max([w for u, v, w in hcc.edges(data='weight')]) if max_w == 0: return 0 else: return sum(clustering(hcc, weight='weight').values()) / float(len(V)) #average_clustering(hcc)
def get_topological_features(G, nodes=None): N_ = len(G.nodes) if nodes is None: nodes = G.nodes # Degree centrality d_c = get_features(degree_centrality(G).values()) print 'a' # Betweeness centrality b_c = get_features(betweenness_centrality(G).values()) print 'b' # Close ness centrality c_c = get_features(closeness_centrality(G).values()) print 'c' # Clustering c = get_features(clustering(G).values()) print 'd' d = diameter(G) r = radius(G) s_p_average = [] for s in shortest_path_length(G): dic = s[1] lengths = dic.values() s_p_average += [sum(lengths) / float(N_)] s_p_average = get_features(s_p_average) features = np.concatenate((d_c, b_c, c_c, c, s_p_average, [d], [r]), axis=0) return features
def clustering_coefficient_and_path_length(G): clustering_values = clustering(G) clustering_values = np.mean( [abs(value) for value in clustering_values.values()]) path_length = [] for C in (G.subgraph(c).copy() for c in nx.connected_components(G)): path_length.append(average_shortest_path_length(C)) path_length = np.mean(path_length) return clustering_values, path_length
def attack_degree(graph, nodes_number, iteration_removals): after_removals = [] lenght_range = int((nodes_number - iteration_removals)/iteration_removals) degress = list(graph.degree()) degress.sort(key=lambda x: x[1], reverse=True) first_element = first_tuple_element(degress) degress_reshaped = np.reshape( first_element, (iteration_removals, int(nodes_number/iteration_removals))).T for iterator in tqdm(range(lenght_range)): # remove iterations_removals do maior componente conexo da rede graph.remove_nodes_from(degress_reshaped[iterator]) after_removals.append( np.mean(list(nx_cluster.clustering(graph).values()))) return after_removals
def clustering_coefficient(self, use_undirected=False): if use_undirected: g = self.g.to_undirected() else: g = self.g if not self.department_clusters: self.extract_departments() res = [] for department, cluster in self.department_clusters.items(): cfs = clustering(g, cluster) res.append((department, sum(cfs.values()) / len(cfs))) res = sorted(res, key=lambda x: x[1], reverse=True)[:self.top_k] return res
def __graph_props(self): if self.__data_dict["Connected"]: self.__data_dict['Diameter'] = nx.diameter(self.__g) self.__data_dict['Radius'] = nx.radius(self.__g) self.__data_dict["Centered nodes"] = list(nx.center(self.__g)) else: self.__data_dict['Diameter'] = None self.__data_dict['Radius'] = None self.__data_dict["Centered nodes"] = [] if self.__data_dict["Type"] == "Undirected": self.__data_dict['Average Clustering'] = nx.average_clustering( self.__g) self.__clustering_coefficients = cluster.clustering(self.__g) else: self.__data_dict['Average Clustering'] = None
def random_attack(graph, nodes_number, iteration_removals): # lista onde estão os valores de clusterização da rede depois da remoção dos nos after_removals = [] # number_nodes_largest_cc = [] #número de nós do maior componente da rede lenght_range = int((nodes_number - iteration_removals)/iteration_removals) for _ in tqdm(range(lenght_range)): # retorna um set, com os nos que representam o maior componente conexo da rede largest_cc = max(networkx.connected_components(graph), key=len) # se o tamanho do maior componente conexo por menor que o número de nos que quero remover, sair do loop if len(largest_cc) < iteration_removals: break # number_nodes_largest_cc.append(len(largest_cc)) # remove iterations_removals do maior componente conexo da rede graph.remove_nodes_from(random.sample(largest_cc, iteration_removals)) after_removals.append( np.mean(list(nx_cluster.clustering(graph).values()))) return after_removals
def nx_average_clustering_per_k(g): coefficients = [None] * g.number_of_nodes() for k in range(len(coefficients)): coefficients[k] = [0,0] clustering_coefficient = clustering(g) # dict of (vertex, cc) all_degrees = g.degree() # list of (vertex, degree) for deg in all_degrees: coefficients[deg[1]][0] += 1 coefficients[deg[1]][1] += clustering_coefficient[deg[0]] # average cc ck = [] for coef in coefficients: if coef[0] == 0: ck.append(0) else: ck.append(coef[1]/coef[0]) return ck
def clustering_coefficient(g, department_clusters=None, use_undirected=False, k=20): if use_undirected: g = g.to_undirected() else: g = g if not department_clusters: department_clusters = extract_departments(g) res = [] for department, cluster in department_clusters.items(): cfs = clustering(g, cluster) res.append((department, sum(cfs.values()) / len(cfs))) res = sorted(res, key=lambda x: x[1], reverse=True)[:k] return res
def graph_stats(G): """ Compute all the graph-related statistics in the features. Note that since the graph is always fully connected, all of these are the weighted versions. For this reason, many of these functions use the implementations in bctpy rather than NetworkX. """ # Local measures clustering_dict = clustering(G, weight='weight') adjacency = np.array(adjacency_matrix(G).todense()) betweenness_centrality_dict = betweenness_centrality(G, weight='weight') paths = shortest_path_length(G, weight='weight') eccentricities = [max(dists.values()) for (source, dists) in sorted(paths)] local_measures = np.concatenate( [[v for (k, v) in sorted(clustering_dict.items())], [v for (k, v) in sorted(betweenness_centrality_dict.items())], eccentricities]) graph_diameter = max(eccentricities) graph_radius = min(eccentricities) aspl = average_shortest_path_length(G, weight='weight') global_measures = np.array([graph_diameter, graph_radius, aspl]) return np.concatenate([local_measures, global_measures])
def getNodeClusteringCoefficient( self, node: V) -> Union[Any, int, float, dict[Any, Union[Any, int]]]: """ """ return clustering(self.graph, node)
G.add_edge(edge, node_id) pG = projected_graph(G, people) distances_to_furthest_nodes = dict() for source, targets_to_paths in shortest_path(pG).iteritems(): longest_shortest_path_length = 0 for target, path in targets_to_paths.iteritems(): longest_shortest_path_length = max( [longest_shortest_path_length, len(path)]) distances_to_furthest_nodes[source] = longest_shortest_path_length nodes['distanceToFurthestNode'] = pandas.Series(distances_to_furthest_nodes, index=nodes.index) nodes['clusteringCoefficient'] = pandas.Series(clustering(pG), index=nodes.index) nodes['betweennessCentrality'] = pandas.Series(betweenness_centrality(pG), index=nodes.index) nodes['degree'] = pandas.Series(pG.degree(), index=nodes.index) print "vertices: {}".format(pG.number_of_nodes()) print "edges: {}".format(pG.number_of_edges()) print nodes.nlargest( 5, 'clusteringCoefficient')[['clusteringCoefficient', 'name']] print nodes.nlargest( 5, 'betweennessCentrality')[['betweennessCentrality', 'name']] print nodes.nlargest( 5, 'distanceToFurthestNode')[['distanceToFurthestNode', 'name']] print nodes.nlargest(5, 'degree')[['degree', 'name']]
def ver_medidas(G): print(function.info(G)) """ Numero minimo de nodos que deben ser removidos para desconectar G """ print("Numero minimo de nodos que deben ser removidos para desconectar G :"+str(approximation.node_connectivity(G))) """ average clustering coefficient of G. """ print("average clustering coefficient of G: "+str(approximation.average_clustering(G))) """ Densidad de un Grafo """ print("Densidad de G: "+str(function.density(G))) """ Assortativity measures the similarity of connections in the graph with respect to the node degree. Valores positivos de r indican que existe una correlacion entre nodos con grado similar, mientras que un valor negativo indica correlaciones entre nodos de diferente grado """ print("degree assortativity:"+str(assortativity.degree_assortativity_coefficient(G))) """ Assortativity measures the similarity of connections in the graph with respect to the given attribute. """ print("assortativity for node attributes: "+str(assortativity.attribute_assortativity_coefficient(G,"crime"))) """ Grado promedio vecindad """ plt.plot(assortativity.average_neighbor_degree(G).values()) plt.title("Grado promedio vecindad") plt.xlabel("Nodo") plt.ylabel("Grado") plt.show(); """ Grado de Centralidad de cada nodo """ plt.plot(centrality.degree_centrality(G).values()) plt.title("Grado de centralidad") plt.xlabel("Nodo") plt.ylabel("Centralidad") plt.show(); """ Calcular el coeficiente de agrupamiento para nodos """ plt.plot(cluster.clustering(G).values()) plt.title("coeficiente de agrupamiento") plt.xlabel("Nodo") plt.show(); """ Media coeficiente de Agrupamiento """ print("Coeficiente de agrupamiento de G:"+str(cluster.average_clustering(G))) """ Centro del grafo El centro de un grafo G es el subgrafo inducido por el conjunto de vertices de excentricidad minima. La excentricidad de v in V se define como la distancia maxima desde v a cualquier otro vertice del grafo G siguiendo caminos de longitud minima. """ print("Centro de G:"+ str(distance_measures.center(G))) """ Diametro de un grafo The diameter is the maximum eccentricity. """ print("Diametro de G:"+str(distance_measures.diameter(G))) """ Excentricidad de cada Nodo The eccentricity of a node v is the maximum distance from v to all other nodes in G. """ plt.plot(distance_measures.eccentricity(G).values()) plt.title("Excentricidad de cada Nodo") plt.xlabel("Nodo") plt.show(); """ Periferia The periphery is the set of nodes with eccentricity equal to the diameter. """ print("Periferia de G:") print(distance_measures.periphery(G)) """ Radio The radius is the minimum eccentricity. """ print("Radio de G:"+str(distance_measures.radius(G))) """ PageRank calcula una clasificacion de los nodos en el grafico G en funcion de la estructura de los enlaces entrantes. Originalmente fue disenado como un algoritmo para clasificar paginas web. """ plt.plot(link_analysis.pagerank_alg.pagerank(G).values()) plt.title("Puntaje de cada Nodo") plt.xlabel("Nodo") plt.show(); """ Coeficiente de Small World. A graph is commonly classified as small-world if sigma>1. """ print("Coeficiente de Small World: " + str(smallworld.sigma(G))) """ The small-world coefficient (omega) ranges between -1 and 1. Values close to 0 means the G features small-world characteristics. Values close to -1 means G has a lattice shape whereas values close to 1 means G is a random graph. """ print("Omega coeficiente: "+str(smallworld.omega(G)))
y = np.array(y) + 1 x = np.log(x) y = np.log(y) plt.scatter(x, y, s=1, color=(1, 0, 0)) plt.show() net_file = '../net_6' # Read graph from file G = read_edgelist(net_file) print("The net_6 has %d nodes and %d edges" % (G.number_of_nodes(), G.number_of_edges())) # Clustering coefficient 0.07872 C = list(clustering(G).values()) C = sum(C) / len(C) print("The net_6 has clustering coefficient: %f" % C) scatterplot_degree_distribution(G) log_scatterplot_degree_distribution(G) d = parallel_diam(G, 5) print("The net_6 has diameter: ", d) ############################### ## net 6 is random? ############################### RG = randomG(10000, 0.0016) print("The random graph has %d nodes and %d edges" % (G.number_of_nodes(), G.number_of_edges())) # Clustering coefficient 0.07872
# deg_dict = OrderedDict(sorted(graph.degree(), key=lambda x: x[0])) # deg_dict2 = OrderedDict(sorted(graph2.degree(), key=lambda x: x[0])) # deg_dict3 = OrderedDict(sorted(graph3.degree(), key=lambda x: x[0])) # deg_dict_avg = [(list(deg_dict.values())[x] + list(deg_dict2.values())[x] + list(deg_dict3.values())[x])/3 for x in range(len(deg_dict.values()))] # # # Assortativity Plot # plt.scatter(deg_dict_avg, neighbor_deg_avg, marker=".") # plt.xlabel('Degree') # plt.ylabel('Average neighbour degree') # plt.show() # # Pearson Correlation of Plot: # print(numpy.corrcoef(list(deg_dict_avg), list(neighbor_deg_avg))[0, 1]) # Clustering Data data = OrderedDict(sorted((cluster.clustering(graph)).items())) data2 = OrderedDict(sorted((cluster.clustering(graph2)).items())) data3 = OrderedDict(sorted((cluster.clustering(graph3)).items())) # Clustering Calculation values = [(list(data.values())[x] + list(data2.values())[x] + list(data3.values())[x])/3 for x in range(len(data.values()))] values.sort() keys = data.keys() # Clustering Plot plt.scatter(values, range(len(keys)), marker=".") plt.ylabel('Cumulative frequency of Nodes') plt.xlabel('Local Clustering Coefficient Value of Nodes') # Twitter Annotation: plt.annotate('Median value at 0.4 and 500', xytext=(0.45, 500), xy=(0.402, 500), arrowprops = {'facecolor':'red'}) # # Google+ Annotation:
def avgClustering(self, W): from networkx.algorithms.cluster import clustering g = buildGraph(W, weighted=self.weighted) cl = clustering(g, weight='weight' if self.weighted else None) return centerMeasure(cl.values(), self.center)
def find_topics_topicness(g: nx.DiGraph, visualize=False) -> np.ndarray: """ Computes a keyword-topic matrix containing memebership information of keywords w.r.t topics. Extended SUmmuray ----------------- The computation of this matrix uses an iterative algorithm, which picks best nodes using a 'topicness' metric, derived from pagerank, local cluster coefficient, and betweenness centrality. Parameters ---------- g : nx.DiGraph keyword co-occurrence graph Returns ------- ndarray keyword-topic matrix, containing the information about keywords and topics. specifically for the entry [ui,ti] if contains 1 if the topic is in """ n = g.number_of_nodes() m = g.number_of_edges() # get betweenness centrality betweenness = misc.dictionary_to_numpy( nx.betweenness_centrality(g.to_undirected(as_view=True))) # get personalized-pagerank using cluster coefficient c = cluster.clustering(g, weight="weight") z = sum(c.values()) if z != 0: v = {ui: c[ui] / z for ui in range(n)} pagerank = misc.dictionary_to_numpy( nx.pagerank_numpy(g, personalization=v)) else: pagerank = misc.dictionary_to_numpy(nx.pagerank_numpy(g)) # compute topicness -------------------------- pagerank: np.ndarray = pagerank / pagerank.max() # normalize between [0,1] betweenness = betweenness / betweenness.max() if betweenness.max( ) != 0 else betweenness # normalize between [0,1] topicness: np.ndarray = pagerank * (1 - betweenness) # normalize topicness between [0,1] topicness = topicness - topicness.min() topicness = topicness / (topicness.max()) # normalize the edge weights influence.normalize_weights(g, mode="mixed") # compute area of influence for each node node_influence = np.zeros([n, n], dtype=float) for u in tqdm.trange(n): node_influence[u, :] = influence.linear_threshold_mean(g, {u}, 15) # estiamte topics topics_list, surface = list(), np.array(topicness) #for i in tqdm.trange(k): while (surface > 0).any(): # compute topic influence fuzzy_topic = node_influence[surface.argmax(), :] crisp_topic = defuzzification(fuzzy_topic) # add topic to extracted topics topics_list.append(crisp_topic) # update surface surface = surface * (1 - fuzzy_topic) # fill node-topic matrix topic_number = len(topics_list) T = np.zeros([n, topic_number], dtype=float) # node-topic matrix for i in range(topic_number): T[:, i] = topics_list[i] # visualize various information about the estiamted topics if visualize: coverage = T.sum(axis=1) sources = np.zeros([n]) sources[T.argmax(axis=0)] = 1 visualization.plot_edge_weights(g) visualization.show_graphfunction(g, topicness, with_labels=False) visualization.show_graphfunction(g, sources, with_labels=False) visualization.show_graphfunction(g, coverage, with_labels=False) return T
# coding: utf-8 import networkx import igraph graph_nx = networkx.waxman_graph(20) graph_ig = igraph.Graph(len(graph_nx), list(zip(*list(zip(*networkx.to_edgelist(graph_nx)))[:2]))) import numpy as np import networkx.algorithms.cluster as nx_cluster cluster_nx = np.mean(list(nx_cluster.clustering(graph_nx).values())) cluster_nx nx_cluster.clustering(graph_nx).values() graph_nx = networkx.waxman_graph(100) nx_cluster.clustering(graph_nx).values() cluster_nx = np.mean(list(nx_cluster.clustering(graph_nx).values())) cluster_nx graph_ig = igraph.Graph(len(graph_nx), list(zip(*list(zip(*networkx.to_edgelist(graph_nx)))[:2]))) graph_ig.transitivity_avglocal_undirected() graph_ig.transitivity_undirected() graph_ig.transitivity_local_undirected() cluster_ig = np.mean(graph_ig.transitivity_local_undirected()) cluster_ig np.mean(graph_ig.transitivity_local_undirected()) graph_ig.transitivity_local_undirected() cluster_ig = graph_ig.transitivity_local_undirected() cluster_ig np.nan_to_num(cluster_ig) cluster_ig cluster_ig = np.nan_to_num(cluster_ig) cluster_ig np.mean(cluster_ig) nx_cluster.transitivity(graph_nx)