def assortativity(orig_g_M, otherModel_M, name): if len(orig_g_M) is not 0: dorig = pd.DataFrame() for g in orig_g_M: kcdf = pd.DataFrame.from_dict(nx.average_neighbor_degree(g).items()) kcdf['k'] = g.degree().values() dorig = pd.concat([dorig, kcdf]) print "orig" gb = dorig.groupby(['k']) zz = len(gb[1].mean().values) sa = int(math.ceil(zz/75)) if sa == 0: sa=1 for x in range(0, len(gb[1].mean().values), sa): print "(" + str(gb.mean().index[x]) + ", " + str(gb[1].mean().values[x]) + ")" if len(otherModel_M) is not 0: dorig = pd.DataFrame() for g in otherModel_M: kcdf = pd.DataFrame.from_dict(nx.average_neighbor_degree(g).items()) kcdf['k'] = g.degree().values() dorig = pd.concat([dorig, kcdf]) print "the other model ", name gb = dorig.groupby(['k']) zz = len(gb[1].mean().values) sa = int(math.ceil(zz/75)) if sa == 0: sa=1 for x in range(0, len(gb[1].mean().values), sa): print "(" + str(gb.mean().index[x]) + ", " + str(gb[1].mean().values[x]) + ")" return
def test_degree_p4_weighted(self): G = nx.path_graph(4) G[1][2]['weight'] = 4 answer = {0: 2, 1: 1.8, 2: 1.8, 3: 2} nd = nx.average_neighbor_degree(G, weight='weight') assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D, weight='weight') assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D, weight='weight') assert_equal(nd, answer) nd = nx.average_neighbor_degree(D, source='out', target='out', weight='weight') assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D, source='in', target='in', weight='weight') assert_equal(nd, answer)
def test_degree_p4_weighted(self): G = nx.path_graph(4) G[1][2]["weight"] = 4 answer = {0: 2, 1: 1.8, 2: 1.8, 3: 2} nd = nx.average_neighbor_degree(G, weight="weight") assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D, weight="weight") assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D, weight="weight") assert nd == answer nd = nx.average_neighbor_degree(D, source="out", target="out", weight="weight") assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D, source="in", target="in", weight="weight") assert nd == answer
def test_degree_barrat(self): G = nx.star_graph(5) G.add_edges_from([(5, 6), (5, 7), (5, 8), (5, 9)]) G[0][5]["weight"] = 5 nd = nx.average_neighbor_degree(G)[5] assert nd == 1.8 nd = nx.average_neighbor_degree(G, weight="weight")[5] assert nd == pytest.approx(3.222222, abs=1e-5)
def test_degree_barrat(self): G=nx.star_graph(5) G.add_edges_from([(5,6),(5,7),(5,8),(5,9)]) G[0][5]['weight']=5 nd = nx.average_neighbor_degree(G)[5] assert_equal(nd,1.8) nd = nx.average_neighbor_degree(G,weight='weight')[5] assert_almost_equal(nd,3.222222,places=5)
def test_degree_barrat(self): G = nx.star_graph(5) G.add_edges_from([(5, 6), (5, 7), (5, 8), (5, 9)]) G[0][5]['weight'] = 5 nd = nx.average_neighbor_degree(G)[5] assert_equal(nd, 1.8) nd = nx.average_neighbor_degree(G, weight='weight')[5] assert_almost_equal(nd, 3.222222, places=5)
def graph_feature(): # Create a directed graph G = nx.read_edgelist('Cit-HepTh.txt', delimiter='\t', create_using=nx.DiGraph()) print("Nodes: ", G.number_of_nodes()) print("Edges: ", G.number_of_edges()) # Read training data train_ids = list() y_train = list() with open('train.csv', 'r') as f: next(f) for line in f: t = line.split(',') train_ids.append(t[0]) y_train.append(t[1][:-1]) n_train = len(train_ids) unique = np.unique(y_train) print("\nNumber of classes: ", unique.size) # Create the training matrix. Each row corresponds to an article. # Use the following 3 features for each article: # (1) out-degree of node # (2) in-degree of node # (3) average degree of neighborhood of node avg_neig_deg = nx.average_neighbor_degree(G, nodes=train_ids) X_train = np.zeros((n_train, 3)) for i in range(n_train): X_train[i, 0] = G.out_degree(train_ids[i]) X_train[i, 1] = G.in_degree(train_ids[i]) X_train[i, 2] = avg_neig_deg[train_ids[i]] # Read test data test_ids = list() with open('test.csv', 'r') as f: next(f) for line in f: test_ids.append(line[:-2]) # Create the test matrix. Use the same 3 features as above n_test = len(test_ids) avg_neig_deg = nx.average_neighbor_degree(G, nodes=test_ids) X_test = np.zeros((n_test, 3)) for i in range(n_test): X_test[i, 0] = G.out_degree(test_ids[i]) X_test[i, 1] = G.in_degree(test_ids[i]) X_test[i, 2] = avg_neig_deg[test_ids[i]] print("\nTrain matrix dimensionality: ", X_train.shape) print("Test matrix dimensionality: ", X_test.shape) return X_train, y_train, X_test
def average_neighbor_degree_sum(self): if (self.average_neighbor_degree_dict == None): self.average_neighbor_degree_dict = nx.average_neighbor_degree( self.graph, weight="weight") time.sleep(1) return self.average_neighbor_degree_dict[ self.node_1] + self.average_neighbor_degree_dict[self.node_2]
def get_nearest_neighbor_degree(network: nx.graph): """ Calculates the average nearest neighbor degree for each node for the given list of networks. Parameters ---------- network: a NetworkX graph objects Returns ------- degrees: list-like an array of node degree nearest_neighbor_degrees: list-like an array of node average nearest neighbor degree in the same order as degrees """ degrees = [] nearest_neighbor_degrees = [] deg = nx.degree(network) nnd = nx.average_neighbor_degree(network) for (key, item) in sorted(deg.items(), key=operator.itemgetter(1)): degrees.append(item) nearest_neighbor_degrees.append(nnd[key]) return degrees, nearest_neighbor_degrees
def get_graphnodefeatures(g): for node_id, node_data in g.nodes(data=True): node_data["feature"] = [ g.degree(node_id, weight="weight"), nx.average_neighbor_degree(g, nodes=[node_id], weight="weight")[node_id], 1, 1, 1 ]
def create_graph(self): g = nx.Graph() duplicated_nodes_list = self.only_nodes.iloc[:,0].append(self.only_nodes.iloc[:,1]).reset_index(drop=True) nodes_list = duplicated_nodes_list.values.tolist() No_duplicate_nodes = set(nodes_list) # print(len(No_duplicate_nodes))#327 g.add_nodes_from(No_duplicate_nodes) g.add_edges_from(self.No_duplicate_links) # nx.draw(g,node_size = 1.5)#with_labels=True # plt.draw() # plt.show() link_density = nx.density(g) # print(link_density)#0.109 average_degree = nx.average_neighbor_degree(g) # numbers degreeede= [average_degree[key] for key in average_degree] # mean = statistics.mean(numbers) # var = statistics.variance(numbers) # print(var) degree_correlation = nx.degree_pearson_correlation_coefficient(g) # print(degree_correlation)#0.033175769936049336 average_clustering = nx.average_clustering(g) # print(average_clustering)#0.5035048191728447 average_hopcount = nx.average_shortest_path_length(g) # print(average_hopcount)#2.1594341569576554 diameter = nx.diameter(g) # print(diameter)#4 # A = nx.adjacency_matrix(g) A_eigenvalue = nx.adjacency_spectrum(g) # print(max(A_eigenvalue))#(41.231605032525835+0j) G_eigenvalue = nx.laplacian_spectrum(g) # print(sorted(G_eigenvalue))#1.9300488624481513 return g, nodes_list, No_duplicate_nodes, link_density, average_degree
def test_lattice3(self): G = pr.generateGraph("lattice", N=1000, dim=2) assert len(G.nodes()) == 961 assert len(G.edges()) == 1860 degrees = nx.average_neighbor_degree(G).values() ave_degree = reduce(lambda x, y: x + y, degrees) / len(degrees) assert int(round(ave_degree)) == 4
def draw_graph(nodes, edges, graphs_dir, default_lang='all'): lang_graph = nx.MultiDiGraph() lang_graph.add_nodes_from(nodes) for edge in edges: if edges[edge] == 0: lang_graph.add_edge(edge[0], edge[1]) else: lang_graph.add_edge(edge[0], edge[1], weight=float(edges[edge]), label=str(edges[edge])) # print graph info in stdout # degree centrality print('-----------------\n\n') print(default_lang) print(nx.info(lang_graph)) try: # When ties are associated to some positive aspects such as friendship or collaboration, # indegree is often interpreted as a form of popularity, and outdegree as gregariousness. DC = nx.degree_centrality(lang_graph) max_dc = max(DC.values()) max_dc_list = [item for item in DC.items() if item[1] == max_dc] except ZeroDivisionError: max_dc_list = [] # https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%81%D0%BD%D1%8B%D0%B5_%D1%81%D0%B5%D1%82%D0%B8 print('maxdc', str(max_dc_list), sep=': ') # assortativity coef AC = nx.degree_assortativity_coefficient(lang_graph) print('AC', str(AC), sep=': ') # connectivity print("Слабо-связный граф: ", nx.is_weakly_connected(lang_graph)) print("количество слабосвязанных компонент: ", nx.number_weakly_connected_components(lang_graph)) print("Сильно-связный граф: ", nx.is_strongly_connected(lang_graph)) print("количество сильносвязанных компонент: ", nx.number_strongly_connected_components(lang_graph)) print("рекурсивные? компоненты: ", nx.number_attracting_components(lang_graph)) print("число вершинной связности: ", nx.node_connectivity(lang_graph)) print("число рёберной связности: ", nx.edge_connectivity(lang_graph)) # other info print("average degree connectivity: ", nx.average_degree_connectivity(lang_graph)) print("average neighbor degree: ", sorted(nx.average_neighbor_degree(lang_graph).items(), key=itemgetter(1), reverse=True)) # best for small graphs, and our graphs are pretty small print("pagerank: ", sorted(nx.pagerank_numpy(lang_graph).items(), key=itemgetter(1), reverse=True)) plt.figure(figsize=(16.0, 9.0), dpi=80) plt.axis('off') pos = graphviz_layout(lang_graph) nx.draw_networkx_edges(lang_graph, pos, alpha=0.5, arrows=True) nx.draw_networkx(lang_graph, pos, node_size=1000, font_size=12, with_labels=True, node_color='green') nx.draw_networkx_edge_labels(lang_graph, pos, edges) # saving file to draw it with dot-graphviz # changing overall graph view, default is top-bottom lang_graph.graph['graph'] = {'rankdir': 'LR'} # marking with blue nodes with maximum degree centrality for max_dc_node in max_dc_list: lang_graph.node[max_dc_node[0]]['fontcolor'] = 'blue' write_dot(lang_graph, os.path.join(graphs_dir, default_lang + '_links.dot')) # plt.show() plt.savefig(os.path.join(graphs_dir, 'python_' + default_lang + '_graph.png'), dpi=100) plt.close()
def test_lattice6(self): G = pr.generateGraph("lattice", N=6400, dim=3) assert len(G.nodes()) == 5832 assert len(G.edges()) == 16524 degrees = nx.average_neighbor_degree(G).values() ave_degree = reduce(lambda x, y: x + y, degrees) / len(degrees) assert int(round(ave_degree)) == 6
def get_initial_proj_nodes(G, key): """ function that gets the graph and return the nodes that we would like them to be in the initial projection """ # a dictionary of the nodes and their degrees dict_degrees = dict(G.degree(G.nodes())) # a dictionary of the nodes and the average degrees dict_avg_neighbor_deg = nx.average_neighbor_degree(G) # sort the dictionary sort_degrees = sorted(dict_degrees.items(), key=lambda pw: (pw[1], pw[0])) # list # sort the dictionary sort_avg_n_d = sorted(dict_avg_neighbor_deg.items(), key=lambda pw: (pw[1], pw[0])) # list # choose only some percents of the nodes with the maximum degree top_deg = sort_degrees[int(key * len(sort_degrees)):len(sort_degrees)] # choose only some percents of the nodes with the maximum average degree top_avgn_deg = sort_avg_n_d[int(key * len(sort_avg_n_d)):len(sort_avg_n_d)] # a code to choose the nodes that have maximum degree and also maximum average degree tmp_deg = top_deg tmp_n_deg = top_avgn_deg for i in range(len(top_deg)): tmp_deg[i] = list(tmp_deg[i]) tmp_deg[i][1] = 5 for i in range(len(top_avgn_deg)): tmp_n_deg[i] = list(tmp_n_deg[i]) tmp_n_deg[i][1] = 10 # the nodes with the maximal degree- the nodes we want to do the projection on final_nodes = np.intersect1d(tmp_n_deg, tmp_deg) list_final_nodes = list(final_nodes) for i in range(len(list_final_nodes)): list_final_nodes[i] = str(list_final_nodes[i]) return list_final_nodes
def analyze_graph(G): #centralities and node metrics out_degrees = G.out_degree() in_degrees = G.in_degree() betweenness = nx.betweenness_centrality(G) eigenvector = nx.eigenvector_centrality_numpy(G) closeness = nx.closeness_centrality(G) pagerank = nx.pagerank(G) avg_neighbour_degree = nx.average_neighbor_degree(G) redundancy = bipartite.node_redundancy(G) load = nx.load_centrality(G) hits = nx.hits(G) vitality = nx.closeness_vitality(G) for name in G.nodes(): G.node[name]['out_degree'] = out_degrees[name] G.node[name]['in_degree'] = in_degrees[name] G.node[name]['betweenness'] = betweenness[name] G.node[name]['eigenvector'] = eigenvector[name] G.node[name]['closeness'] = closeness[name] G.node[name]['pagerank'] = pagerank[name] G.node[name]['avg-neigh-degree'] = avg_neighbour_degree[name] G.node[name]['redundancy'] = redundancy[name] G.node[name]['load'] = load[name] G.node[name]['hits'] = hits[name] G.node[name]['vitality'] = vitality[name] #communities partitions = community.best_partition(G) for member, c in partitions.items(): G.node[member]['community'] = c return G
def get_nearest_neighbor_degree(network): """ Calculates the average nearest neighbor degree for each node for the given list of networks. Parameters ---------- network: a NetworkX graph objects Returns ------- degrees: list-like an array of node degree nearest_neighbor_degrees: list-like an array of node average nearest neighbor degree in the same order as degrees """ degrees = [] nearest_neighbor_degrees = [] nodes = network.nodes() n_nodes = len(nodes) nn_dictionary = nx.average_neighbor_degree(network) for n in nodes: degrees.append(network.degree(n)) k_nn = nn_dictionary[n] nearest_neighbor_degrees.append(k_nn) return degrees, nearest_neighbor_degrees
def __init__(self, graph, node_1=None, node_2=None): self.graph = graph self.node_1 = node_1 self.node_2 = node_2 self.clustering_dict = nx.clustering(graph) self.betweenness_dict = nx.betweenness_centrality(graph) self.average_neighbor_degree_dict = nx.average_neighbor_degree(graph) self.attributes_map = { "adamic_adar_similarity": self.adamic_adar_similarity, "average_clustering_coefficient": self.average_clustering_coefficient, "average_neighbor_degree_sum": self.average_neighbor_degree_sum, "betweenness_centrality": self.betweenness_centrality, "closeness_centrality_sum": self.closeness_centrality_sum, "clustering_coefficient_sum": self.clustering_coefficient_sum, "common_neighbors": self.common_neighbors, "cosine": self.cosine, "jaccard_coefficient": self.jaccard_coefficient, "katz_measure": self.katz_measure, "preferential_attachment": self.preferential_attachment, "square_clustering_coefficient_sum": self.square_clustering_coefficient_sum, "sum_of_neighbors": self.sum_of_neighbors, "sum_of_papers": self.sum_of_papers, "get_shortest_path_length": self.get_shortest_path_length, "get_second_shortest_path_length": self.get_second_shortest_path_length } if(self.node_1 != None and self.node_2 != None): self.neighbors_1 = self.all_neighbors(self.node_1) self.neighbors_2 = self.all_neighbors(self.node_2)
def generate_feature(self): mydata = genfromtxt("graph/"+self.filename, delimiter=',') adjacency = mydata[1:,:] G = nx.from_numpy_matrix(adjacency, create_using=nx.DiGraph()) # INitialize Data set data = np.array([[]]) k = [] v = [] #wCC wCC_dict = nx.clustering(G) wCC_k = ['wCC_'+str(x) for x in list(wCC_dict.keys())] wCC_v = list(wCC_dict.values()) #wAND wAND_dict = nx.average_neighbor_degree(G) wAND_k = ['wAND_'+str(x) for x in list(wAND_dict.keys())] wAND_v = list(wAND_dict.values()) #wNBC wNBC_dict = nx.betweenness_centrality(G) wNBC_k = ['wAND_'+str(x) for x in list(wNBC_dict.keys())] wNBC_v = list(wNBC_dict.values()) # Merge k = wNBC_k + wAND_k + wCC_k v = wNBC_v + wAND_v + wCC_v #Insert data = np.append(data,[k], axis = 1) data = np.append(data,[v], axis = 0) return data
def get_initial_proj_nodes_by_degrees(G, number): """ Function to decide which nodes would be in the initial embedding by highest degree. :param G: Our graph :param number: Controls number of nodes in the initial projection :return: A list of the nodes that are in the initial projection """ nodes = list(G.nodes()) # a dictionary of the nodes and their degrees dict_degrees = dict(G.degree(G.nodes())) # a dictionary of the nodes and the average degrees dict_avg_neighbor_deg = nx.average_neighbor_degree(G) # sort the dictionary sort_degrees = sorted(dict_degrees.items(), key=lambda pw: (pw[1], pw[0])) # list sort_degrees.reverse() new_dict_degrees = {} for i in range(len(sort_degrees)): new_dict_degrees.update({sort_degrees[i][0]: i}) sort_avg_n_d = sorted(dict_avg_neighbor_deg.items(), key=lambda pw: (pw[1], pw[0])) # list sort_avg_n_d.reverse() new_dict_avg_degrees = {} for i in range(len(sort_avg_n_d)): new_dict_avg_degrees.update({sort_avg_n_d[i][0]: i}) new_dict = {} for node in nodes: new_dict.update( {node: new_dict_degrees[node] + new_dict_avg_degrees[node]}) x = {k: v for k, v in sorted(new_dict.items(), key=lambda item: item[1])} initial_nodes = [] keys = list(x.keys()) for i in range(number): initial_nodes.append(keys[i]) return initial_nodes
def save_metrics_random(): global density, clustering, assortativity, initial_edges, impact, degree, avg_neigh_degree global ranked_degrees, ranked_nodes, degree_final, node_final new_edges = list(REDS.edges()) intersect = [ filter(lambda x: x in new_edges, sublist) for sublist in initial_edges ] impact.append(len(new_edges) - len(intersect)) density.append(round(nx.density(REDS), 2)) clustering.append(round(nx.average_clustering(REDS), 2)) assortativity.append(round(nx.degree_assortativity_coefficient(REDS), 2)) degree.append(REDS.degree(moving)) avg_neigh_degree.append( round(nx.average_neighbor_degree(REDS, nodes=[moving]).values()[0], 2)) degree_set = [] node_name = [] for i in REDS.nodes(): degree_set.append(REDS.degree(i)) node_name.append(i) degree_final.append(degree_set) node_final.append(node_name) degree_set, node_name = zip(*sorted(zip(degree_set, node_name))) ranked_degrees.append(degree_set) ranked_nodes.append(node_name)
def sort_adjacency(g, a, attr): node_k1 = dict(g.degree()) ## sort by degree node_k2 = nx.average_neighbor_degree(g) ## sort by neighbor degree node_closeness = nx.closeness_centrality(g) node_betweenness = nx.betweenness_centrality(g) node_sorting = list() for node_id in g.nodes(): node_sorting.append( (node_id, node_k1[node_id], node_k2[node_id], node_closeness[node_id], node_betweenness[node_id])) node_descending = sorted(node_sorting, key=lambda x: (x[1], x[2], x[3], x[4]), reverse=True) mapping = dict() for i, node in enumerate(node_descending): mapping[node[0]] = i temp = attr[node[0]] ## switch node attributes according to sorting attr[node[0]] = attr[i] attr[i] = temp a = nx.adjacency_matrix(g, nodelist=mapping.keys()).todense( ) ## switch graph node ids according to sorting return g, a, attr
def get_avg_node_degree(G): avg_node_degree = nx.average_neighbor_degree(G) avg_node_degree_list = list(dict(avg_node_degree).values()) name = f'AvgNodeDegree_{G.name}' return { name: avg_node_degree_list, }
def average_neighbor_degree(gnx, f, ft): start = timer.start(ft, 'average_neighbor_degree') average_neighbor_degree_dict = nx.average_neighbor_degree(gnx) timer.stop(ft, start) for k in average_neighbor_degree_dict: f.writelines( str(k) + ',' + str(average_neighbor_degree_dict[k]) + '\n') return average_neighbor_degree_dict
def metrics_report(g: nx.Graph): C = nx.average_clustering(g) knn = np.mean(np.array(list(dict(nx.average_neighbor_degree(g)).values()))) k = np.mean(np.array(list(dict(g.degree).values()))) E = g.number_of_edges() N = g.number_of_nodes() l = nx.average_shortest_path_length(g) return pd.DataFrame(data={'C': C, 'k_nn': knn, 'k': k, 'E': E, 'N': N, 'l': l}, index=[0])
def test_degree_p4(self): G=nx.path_graph(4) answer={0:2,1:1.5,2:1.5,3:2} nd = nx.average_neighbor_degree(G) assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_degree(D) assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_out_degree(D) assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_in_degree(D) assert_equal(nd,answer)
def test_degree_p4(self): G = nx.path_graph(4) answer = {0: 2, 1: 1.5, 2: 1.5, 3: 2} nd = nx.average_neighbor_degree(G) assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D) assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D) assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D, source="in", target="in") assert nd == answer
def test_degree_p4(self): G = nx.path_graph(4) answer = {0: 2, 1: 1.5, 2: 1.5, 3: 2} nd = nx.average_neighbor_degree(G) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D, source='in', target='in') assert_equal(nd, answer)
def test_degree_k4(self): G = nx.complete_graph(4) answer = {0: 3, 1: 3, 2: 3, 3: 3} nd = nx.average_neighbor_degree(G) assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D) assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D) assert nd == answer D = G.to_directed() nd = nx.average_neighbor_degree(D, source="in", target="in") assert nd == answer
def average_neighbor_degree(self,node): # same caching technique as in self.clustering_coefficient # might also break for very large graphs # nx.average_neighbor_degree(self.graph, nodes=node) might be the way to go if not hasattr(self, 'all_average_neighbor_degrees'): self.all_average_neighbor_degrees = nx.average_neighbor_degree(self.graph) return self.all_average_neighbor_degrees[node]
def test_degree_k4(self): G = nx.complete_graph(4) answer = {0: 3, 1: 3, 2: 3, 3: 3} nd = nx.average_neighbor_degree(G) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D, source='in', target='in') assert_equal(nd, answer)
def test_degree_k4(self): G=nx.complete_graph(4) answer={0:3,1:3,2:3,3:3} nd = nx.average_neighbor_degree(G) assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_degree(D) assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_out_degree(D) assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_in_degree(D) assert_equal(nd,answer)
def clustering_analys(DF_adj, re_type): #测试参数的函数。re_type是返回值的类型 labels = list(DF_adj.index) #print(DF_adj_1,DF_adj) #Network graph G = nx.Graph() G_i = nx.DiGraph() G.add_nodes_from(labels) G_i.add_nodes_from(labels) #Connect nodes for i in range(DF_adj.shape[0]): col_label = DF_adj.columns[i] for j in range(DF_adj.shape[1]): row_label = DF_adj.index[j] node = DF_adj.iloc[i,j] if node != 0: #print(node,DF_adj[labels[i]][labels[j]]) #print(node) G.add_edge(col_label,row_label,weight = node) G_i.add_edge(col_label,row_label,weight = node) if(re_type == 1): return dict_avg(nx.clustering(G))#取平均,队伍或者队员都可以 elif(re_type == 2): L = nx.normalized_laplacian_matrix(G) e = np.linalg.eigvals(L.A) #print("Largest eigenvalue:", max(e))#衡量什么同行网络 return max(e) elif(re_type == 3): return nx.algebraic_connectivity(G) elif(re_type == 4): return(nx.reciprocity(G_i)) elif(re_type == 5): return(nx.transitivity(G_i)) elif(re_type == 6): return(dict_max(nx.in_degree_centrality(G_i))) elif(re_type == 7): return(dict_max(nx.out_degree_centrality(G_i))) elif(re_type == 8): try: return(dict_avg(nx.pagerank(G, alpha=0.9))) except: return(0.01) elif(re_type == 9): try: return(dict_avg(nx.eigenvector_centrality(G))) except: return(0.25) elif(re_type == 10): return(dict_avg(nx.average_neighbor_degree(G_i))) print("-----------------") print(nx.closeness_centrality(G))#衡量星际球员 print("-----------------") print(nx.pagerank(G, alpha=0.9))#衡量球员 print("-----------------") print(nx.eigenvector_centrality(G))#衡量球员 print("-----------------") print()#宏观的连通性 print("-----------------")
def test_degree_p4_weighted(self): G = nx.path_graph(4) G[1][2]['weight'] = 4 answer = {0: 2, 1: 1.8, 2: 1.8, 3: 2} nd = nx.average_neighbor_degree(G, weighted=True) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_degree(D, weighted=True) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_out_degree(D, weighted=True) assert_equal(nd, answer) D = G.to_directed() nd = nx.average_neighbor_in_degree(D, weighted=True) assert_equal(nd, answer)
def test_degree_p4_weighted(self): G=nx.path_graph(4) G[1][2]['weight']=4 answer={0:2,1:1.8,2:1.8,3:2} nd = nx.average_neighbor_degree(G,weight='weight') assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_degree(D,weight='weight') assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_out_degree(D,weight='weight') assert_equal(nd,answer) D=G.to_directed() nd = nx.average_neighbor_in_degree(D,weight='weight') assert_equal(nd,answer)
def descriptives(G,grouping = None): degree = nx.degree_histogram(G) plt.bar(x = range(len(degree)), height = degree) plt.savefig('images/degree_hist.png') plt.close() neighbor_degree = nx.average_neighbor_degree(G) dict_to_hist(neighbor_degree,'neighbor_degree') degree_conn = nx.average_degree_connectivity(G) dict_to_hist(degree_conn,'degree_conn')
def get_degree_correlation(g, method = 'average', mode = 'both'): """ The average neighbor degree/in-degree/out-degree distribution grouped by degree. Similar to the histogram shows the possible degree k, and average/median clustering coefficient of nodes with degree k in graph g. Parameters: ----------- g: NetworkX Graph mode: str, ('in', 'out', 'both'), (default = 'both') method: str, ('average', 'median'), (default = 'average') Returns: -------- xdata, ydata, a 2-tuple of array, (k, <Knn>(k)), where <Knn>(k) denotes as the average/median degree """ # re implement with the function = nx.average_degree_connectivity if mode == 'both': d = g.degree() k = nx.average_neighbor_degree(g) elif mode == 'in': d = g.in_degree() k = nx.average_neighbor_degree(g, source = 'in', target = 'in') elif mode == 'out': d = g.out_degree() k = nx.average_neighbor_degree(g, source = 'out', target = 'out') else: raise NameError("mode must be 'in', 'out', or 'both'") ck = defaultdict(list) #group the nodes by degree for n in g.nodes_iter(): ck[d[n]].append(k[n]) xdata, ydata = list(), list() if method == 'average': for x, y in ifilter(lambda x: x[0] > 0 and average(x[1]) > 0, ck.iteritems()): xdata.append(x) ydata.append(average(y)) elif method == 'median': for x, y in ifilter(lambda x: x[0] > 0 and median(x[1]) > 0, ck.iteritems()): xdata.append(x) ydata.append(median(y)) else: raise NameError("method must be 'average' or 'median'") xdata = array(xdata) ydata = array(ydata) return(xdata, ydata)
def average_neighbor_degree(self, G, name): """ 分析图G的平均邻度 (在最大连通子图上求) :param G: :param name: :return: """ if type(G) == nx.DiGraph: # 有向图要求最大连通子图 + 权重要求和 G = G.subgraph(max(nx.weakly_connected_components(G), key=len)) # 求弱连通子图 G1 = G.to_undirected(reciprocal=False) # 存在一条边即可 G2 = G.to_undirected(reciprocal=True) # 只有相互转发才会保存 for edge in G2.edges: if edge[0] != edge[1]: G1.add_edge( edge[0], edge[1], weight=G.get_edge_data(edge[0], edge[1])['weight'] + G.get_edge_data(edge[1], edge[0])['weight']) G = G1 else: # 无向图直接求最大连通子图 G = G.subgraph(max(nx.connected_components(G), key=len)) # 最大连通子图 print("%s: 节点个数%s 边个数%s" % (name, len(G.nodes), len(G.edges))) degrees = defaultdict(int) # 某个度的出现次数 avg_avg_neighbor_degree = defaultdict(int) avg_avg_weighted_neighbor_degree = defaultdict(float) avg_neighbor_degree = nx.average_neighbor_degree(G) avg_weighted_neighbor_degree = nx.average_neighbor_degree( G, weight="weight") for node in G.nodes: d = G.degree[node] degrees[d] += 1 avg_avg_neighbor_degree[d] += avg_neighbor_degree[node] avg_avg_weighted_neighbor_degree[ d] += avg_weighted_neighbor_degree[node] for d in degrees: avg_avg_neighbor_degree[d] /= degrees[d] avg_avg_weighted_neighbor_degree[d] /= degrees[d] np.save(name + ".neighbor_degree.npy", np.array(sorted(avg_avg_neighbor_degree.items()))) np.save(name + ".weighted_neighbor_degree.npy", np.array(sorted(avg_avg_weighted_neighbor_degree.items())))
def describe_graph(G): """Graph description""" # GRAPH DESCRIPTION graph_desc = pd.Series() # n. nodes graph_desc["number_of_nodes"] = G.number_of_nodes() # n. edges graph_desc["number_of_edges"] = G.number_of_edges() # n. of selfloops graph_desc["number_of_selfloops"] = len(G.selfloop_edges()) # density graph_desc["average_shortest_path_length"] = nx.average_shortest_path_length(G) # connectivity # graph_desc.append(pd.Series(nx.degree_assortativity_coefficient(G), name="degree_assortativity_coefficient")) graph_desc["degree_pearson_correlation_coefficient"] = nx.degree_pearson_correlation_coefficient(G) # NODE DESCRIPTION node_desc = list() # n. of neighbours node_desc.append(pd.Series(G.degree(), name="degree")) node_desc.append(pd.Series(nx.average_neighbor_degree(G), name="average_neighbor_degree")) # n. of outgoing outgoing = pd.Series(G.in_degree(), name="in_degree") node_desc.append(outgoing) # n. of incoming incoming = pd.Series(G.out_degree(), name="out_degree") node_desc.append(incoming) # fold change out/in ratio = np.log2(outgoing + 1) - np.log2(incoming + 1) node_desc.append(pd.Series(ratio, name="out_in_degree_fold_change")) # centrality # degree based node_desc.append(pd.Series(nx.degree_centrality(G), name="degree_centrality")) node_desc.append(pd.Series(nx.in_degree_centrality(G), name="in_degree_centrality")) node_desc.append(pd.Series(nx.out_degree_centrality(G), name="out_degree_centrality")) # closest-path based # node_desc.append(pd.Series(nx.closeness_centrality(G), name="closeness_centrality")) # node_desc.append(pd.Series(nx.betweenness_centrality(G), name="betweenness_centrality")) # # eigenvector-based # node_desc.append(pd.Series(nx.eigenvector_centrality(G), name="eigenvector_centrality")) # node_desc.append(pd.Series(nx.katz_centrality_numpy(G), name="katz_centrality")) # # load-based # node_desc.append(pd.Series(nx.load_centrality(G), name="load_centrality")) return (graph_desc, pd.DataFrame(node_desc).T)
def analyze_graphs(graphs, days): undirected_graphs = list(map(lambda G: G.to_undirected(), graphs)) graph_days = dict(zip(undirected_graphs, days)) connected_graphs = list(filter(lambda G: nx.is_connected(G), undirected_graphs)) connected_days = dict(zip(connected_graphs, list(map( lambda G: graph_days[G], connected_graphs)))) metrics = { #"average_shortest_path_lengths": [lambda G: nx.average_shortest_path_length(G), connected_graphs, connected_days], "clustering": [lambda G: nx.average_clustering(G), undirected_graphs, graph_days], "average_neighbor_degree": [lambda G: nx.average_neighbor_degree(G), graphs, graph_days], "min_weighted_vertex_cover": [lambda G: len(min_weighted_vertex_cover(G)), undirected_graphs, graph_days], #"eccentricity": [lambda G: np.mean(nx.eccentricity(G).values()), connected_graphs, connected_days], #"diameter": [lambda G: nx.diameter(G), connected_graphs, connected_days], #"periphery": [lambda G: len(nx.periphery(G)), connected_graphs, connected_days], "degree_centralities": [lambda G: np.mean(nx.degree_centrality(G).values()), graphs, graph_days], "in_degree_centralities": [lambda G: np.mean(nx.in_degree_centrality(G).values()), graphs, graph_days], "out_degree_centralities": [lambda G: np.mean(nx.out_degree_centrality(G).values()), graphs, graph_days], "closeness_centralities": [lambda G: np.mean(nx.closeness_centrality(G).values()), graphs, graph_days], "betweenness_centralities": [lambda G: np.mean(nx.betweenness_centrality(G).values()), graphs, graph_days] } for metric in metrics: print("Analyzing {}...".format(metric)) function = metrics[metric][0] which_graphs = metrics[metric][1] which_days = metrics[metric][2].values() yArray = list(map(function, which_graphs)) print(which_days) print(yArray) plt.plot(which_days, yArray) plt.xlabel("Day") plt.ylabel(metric) plt.title("{} Over Time".format(metric)) plt.savefig("{}_VS_Time.png".format(metric)) plt.close()
def node_analysis(G, rule): if rule == 'degree': return nx.degree(G) elif rule == 'clustering': return nx.clustering(G) elif rule == 'closeness' or rule == 'centrality': return nx.closeness_centrality(G) elif rule == 'betweeness': return nx.betweenness_centrality(G) elif rule == 'average neighbor degree': return nx.average_neighbor_degree(G) elif rule == 'component': comp = nx.connected_components(G) components = {} for i,c in enumerate(comp): for node in c: components[node] = i return components else: print "Node assignment rule {0} not recognized.".format(rule) sys.exit()
def info_network(G): from networkx.algorithms import bipartite from decimal import Decimal print G.number_of_nodes() print G.number_of_edges() print "average_neighbor_degree" dict = nx.average_neighbor_degree(G) list1 = dict.keys() list2 = dict.values() print list1 print list2 print "degree_assortativity_coefficient" print nx.degree_assortativity_coefficient(G) print "degree_pearson_correlation_coefficient" print nx.degree_pearson_correlation_coefficient(G) # print nx.k_nearest_neighbors(G) print "STOP HERE" print "bipartite.closeness_centrality(G,G.node)" dict2 = bipartite.closeness_centrality(G, G.node) list3 = dict2.values() print list3 print "nx.degree_centrality(G)" dict3 = nx.degree_centrality(G) list4 = dict3.values() print list4 print "nx.betweenness_centrality(G)" dict4 = nx.betweenness_centrality(G) list5 = dict4.values() print list5 print "hits_numpy" dict5 = nx.hits_numpy(G) print dict5
def save_metrics(): global density,clustering,assortativity,initial_edges,impact,degree, avg_neigh_degree, initial_edges global ranked_nodes, ranked_degrees,degree_final,node_final, moving new_edges=list(RGG.edges()) intersect = [filter(lambda x: x in new_edges, sublist) for sublist in initial_edges] impact.append(len(new_edges)-len(intersect)) density.append(round(nx.density(RGG),2)) clustering.append(round(nx.average_clustering(RGG),2)) assortativity.append(round(nx.degree_assortativity_coefficient(RGG),2)) degree.append(RGG.degree(moving)) avg_neigh_degree.append(round(nx.average_neighbor_degree(RGG,nodes=[moving]).values()[0],2)) degree_set=[] node_name=[] for i in RGG.nodes(): degree_set.append(RGG.degree(i)) node_name.append(i) degree_final.append(degree_set) node_final.append(node_name) degree_set,node_name=zip(*sorted(zip(degree_set,node_name))) ranked_degrees.append(degree_set) ranked_nodes.append(node_name)
def __init__(self, graph, feature_list=[]): self.no_feature = 39 self.G = graph self.nodes = nx.number_of_nodes(self.G) self.edges = nx.number_of_edges(self.G) self.Lap = nx.normalized_laplacian_matrix(self.G) # ??? how to check whether comparable, addable? self.eigvals = numpy.linalg.eigvals(self.Lap.A).tolist() try: self.radius = nx.radius(self.G) except nx.exception.NetworkXError: self.radius = "ND" try: self.ecc_dic = nx.eccentricity(self.G) except nx.exception.NetworkXError: self.ecc_dic = {} self.degree_dic = nx.average_neighbor_degree(self.G) self.pagerank = nx.pagerank(self.G).values() if feature_list == []: self.feature_list = list(range(1, self.no_feature + 1)) else: self.feature_list = feature_list self.feature_vector = [] self.feature_time = []
def upload_file(request): f = request.FILES['ds'] # little bit of hacking... format_type = 'json' try: ds = json.load(f) except: rows = csv.reader(f) ds = list() names = list() for row in rows: if len(names) == 0: for v in row: names.append(v) else: idx = 0 cur = dict() for v in row: cur[names[idx]] = v idx += 1 ds.append(cur) format_type = 'csv' # Create network G = nx.Graph() # create date-centered / random id. not guaranteed to be unique. TODO change for scale. now = datetime.datetime.now() ds_id = "%d%d%d%d%d%d.%d" % (now.year, now.month, now.day, now.hour, now.minute, now.second, random.randint(0, 100000)) # known formats. # based on collab2008.json if (type(ds) == type(dict()) and 'links' in ds and 'nodes' in ds and len(ds['links']) > 0 and len(ds['nodes']) > 0): idx = 0 for node in ds['nodes']: G.add_node(idx, country_code=node['id']) idx += 1 for link in ds['links']: G.add_edge(link['source'], link['target'], weight=link['weight']) # based on elena's airbnb data, formatted as csv, with columns: # ego_name, ego_lat, ego_lng, alter_name, alter_lat, alter_lng, weight elif (type(ds) == type(list()) and type(ds[0]) == type(dict()) and 'ego_name' in ds[0] and 'alter_name' in ds[0]): node_names = set() name_to_ll = dict() for d in ds: node_names.add(d['ego_name']) node_names.add(d['alter_name']) name_to_ll[d['ego_name']] = { 'lat': d['ego_lat'], 'lng': d['ego_lng']} name_to_ll[d['alter_name']] = { 'lat': d['alter_lat'], 'lng': d['alter_lng']} nodemap = dict() idx = 0 for node_name in node_names: if node_name not in nodemap: G.add_node(idx, name=node_name, lat=name_to_ll[node_name]['lat'], lng=name_to_ll[node_name]['lng']) nodemap[node_name] = idx idx += 1 for d in ds: G.add_edge(nodemap[d['ego_name']], nodemap[d['alter_name']], weight=int(d['weight'])) else: return "ERROR_UNKNOWN_FORMAT" # Make sure that *every node* has a lat/lng no_geo = [] # maintain list of nodes removed ccode_to_ll = pickle.load(open('DATASETS/code_to_latlng.pkl', 'r')) for idx in G.node: # todo use HttpResponseBadRequest if no lat/lng exists if 'lat' not in G.node[idx] or 'lng' not in G.node[idx]: if 'country_code' in G.node[idx] and G.node[idx]['country_code'] in ccode_to_ll: c = G.node[idx]['country_code'] G.node[idx]['lat'] = ccode_to_ll[c]['lat'] G.node[idx]['lng'] = ccode_to_ll[c]['lng'] else: no_geo.append(idx) # remove nodes with missing geo info for idx in no_geo: G.remove_node(idx) # Add *EXTRA* data. Not always guaranteed to be returned. ctor = pickle.load(open('DATASETS/country_to_continent.pkl', 'r')) code_to_country = pickle.load(open('DATASETS/code_to_country.pkl', 'r')) pp = pprint.PrettyPrinter(stream=sys.stderr) #pp.pprint(G.nodes(data=True)) #pp.pprint(G.edges(data=True)) closeness_vitality = nx.closeness_vitality(G) pagerank = nx.pagerank(G) degree_centrality = nx.degree_centrality(G) average_neighbor_degree = nx.average_neighbor_degree(G) for idx in G.node: if 'country_code' in G.node[idx] and G.node[idx]['country_code'] in ctor: G.node[idx]['region'] = ctor[G.node[idx]['country_code']] else: G.node[idx]['region'] = 'Unknown' if 'country_code' in G.node[idx] and G.node[idx]['country_code'] in code_to_country: G.node[idx]['country_name'] = code_to_country[G.node[idx]['country_code']] else: G.node[idx]['country_name'] = 'Unknown' G.node[idx]['closeness_vitality'] = closeness_vitality[idx] G.node[idx]['pagerank'] = pagerank[idx] G.node[idx]['degree'] = G.degree(idx) G.node[idx]['degree_centrality'] = degree_centrality[idx] G.node[idx]['average_neighbor_degree'] = average_neighbor_degree[idx] G.node[idx]['weight'] = G.degree(idx, 'weight') name = "Location: %.2f,%.2f" % (float(G.node[idx]['lat']), float(G.node[idx]['lng'])) if 'name' in G.node[idx]: name += " (%s)" % G.node[idx]['name'] elif 'country_name' in G.node[idx] and G.node[idx]['country_name'] != "Unknown": name += " (%s)" % G.node[idx]['country_name'] G.node[idx]['name'] = name f = open("DATASETS/graph%s.pickle" % ds_id, 'w') pickle.dump(G, f) print >>sys.stderr, "UPLOAD COMPLETE. %d NODES IGNORED DUE TO MISSING GEO DATA." % len(no_geo) return ds_id
def parallelProperties(name): print name #creating multi directed graph MG=nx.MultiGraph() #reading file and adding nodes - edges file=None if pathToFiles!=None: file=open(pathToFiles+"/"+name,"r") else: file=open("./"+name,"r") listOfInteractions=[] #i will save interactions to rebuild the directed digraph for line in file: splittedLine=line.split("\t") node1=splittedLine[0] node2=splittedLine[1] listOfInteractions.append(node1+":"+node2) MG.add_edge(node1, node2) file.close() ##################################### ## ## dict to save measures ## ##################################### dictProp={} for node in MG.nodes(): dictProp[node]={"average_shortest_path_length":'', "clustering_coefficient":'0',"closeness_centrality":'', "eccentricity":'',"stress":'0',"edge_count":'',"In_degree":'0',"Out_degree":'0',"Betweenness_centrality":'', "Neighborhood_conectivity":''} file=None if pathToFiles!=None: file=open(pathToFiles+"/"+name,"r") else: file=open("./"+name,"r") #################################################################### ## ## for in degree and out degree ## #################################################################### for line in file: splittedLine=line.split("\t") node1=splittedLine[0] node2=splittedLine[1] dictProp[node1]["Out_degree"]=str(int(dictProp[node1]["Out_degree"])+1) dictProp[node2]["In_degree"]=str(int(dictProp[node2]["In_degree"])+1) file.close() #we will see subgraphs subGS=list(nx.connected_component_subgraphs(MG)) #now we will rebuild these graphs as multidigraphs for subG in list(nx.connected_component_subgraphs(MG)): #first step: create a multidigraph md=nx.MultiDiGraph() whitoutSL=nx.MultiGraph() #a graph without selfloops directed=nx.DiGraph() MDNoSelfLoop=nx.MultiDiGraph() #a graph without selfloops #the second step is to loop over the edges, searching for the direction of interaction for edge in nx.edges(subG): nodeX, nodeY=edge #if is a self interaction if nodeX==nodeY: md.add_edge(nodeX,nodeY) directed.add_edge(nodeX,nodeY) else: #if is not a self interaction I will look for the directions (if exist A:B and/or B:A) and Ill add the edge cont=0 if nodeX+":"+nodeY in listOfInteractions: md.add_edge(nodeX,nodeY) directed.add_edge(nodeX,nodeY) whitoutSL.add_edge(nodeX,nodeY) MDNoSelfLoop.add_edge(nodeX,nodeY) if nodeY+":"+nodeX in listOfInteractions: md.add_edge(nodeY,nodeX) whitoutSL.add_edge(nodeY,nodeX) directed.add_edge(nodeY,nodeX) MDNoSelfLoop.add_edge(nodeY,nodeX) #################################################################### ## ## Metrics ## #################################################################### for node in md.nodes(): #################################################################### ## ## Edge count ## #################################################################### dictProp[node]["edge_count"]=str(int(dictProp[node]["Out_degree"])+int(dictProp[node]["In_degree"])) #################################################################### ## ## average shortest path length ## #################################################################### #at this point we have directed subgraphs, so now is time to comute average shortest path of each subgraph #first we will compute shortest path of one node, then we will compute average shortest path length shortestPaths=nx.shortest_path_length(md, source=node) summatory=0 cont=0 for item in shortestPaths.items(): summatory+=float(item[1]) cont+=1 if (cont-1)!=0: dictProp[node]["average_shortest_path_length"]=str(summatory/(cont-1)) #print node,(summatory/(cont-1)) else: dictProp[node]["average_shortest_path_length"]="0" #################################################################### ## ## eccentricity ## #################################################################### higher=0 for paths in shortestPaths.items(): if int(paths[1])>higher: higher=int(paths[1]) dictProp[node]["eccentricity"]=str(higher) #################################################################### ## ## closeness centrality ## #################################################################### for item in (nx.closeness_centrality(md, normalized=False)).items(): dictProp[item[0]]["closeness_centrality"]=str(item[1]) #################################################################### ## ## neighborhood connectivity ## #################################################################### for item in (nx.average_neighbor_degree(whitoutSL)).items(): dictProp[item[0]]["Neighborhood_conectivity"]=str(item[1]) #################################################################### ## ## stress centrality ## #################################################################### for Source in md.nodes(): for Target in md.nodes(): if Source!=Target: try: for path in nx.all_shortest_paths(md,source=Source,target=Target): if len(path)>2: for N in path[1:-1]: dictProp[N]["stress"]=str(int(dictProp[N]["stress"])+1) except: pass #################################################################### ## ## betweenness centrality ## #################################################################### for item in (nx.betweenness_centrality(md)).items(): dictProp[item[0]]["Betweenness_centrality"]=str(item[1]) #################################################################### ## ## clustering coefficient ## #################################################################### for node in MDNoSelfLoop.nodes(): inPlusOut=float(dictProp[node]["Out_degree"])+float(dictProp[node]["In_degree"]) division=(len(whitoutSL.neighbors(node))*(len(whitoutSL.neighbors(node))-1)) if len(whitoutSL.neighbors(node))>1: #if node has at least two neighbour connectedNeighbors=0 neighbors=whitoutSL.neighbors(node) for neighbor in neighbors: #print neighbor neighborsOfNeighbors=MDNoSelfLoop.neighbors(neighbor) #print neighbor, neighborsOfNeighbors for n in neighborsOfNeighbors: #print n if n in neighbors: connectedNeighbors+=1 dictProp[node]["clustering_coefficient"]=str(float(connectedNeighbors)/division) outFile=None if Result!=None: outFile=open(Result+"/"+name[:-4]+".csv","w") else: outFile=open("./"+name[:-4]+".csv","w") outFile.write("\"AverageShortestPathLength\",\"BetweennessCentrality\",\"ClosenessCentrality\",\"ClusteringCoefficient\",\"Eccentricity\",\"EdgeCount\",\"Indegree\",\"name\",\"NeighborhoodConnectivity\",\"Outdegree\",\"Stress\"\n") for item in dictProp.items(): node=item[0] outFile.write("\""+dictProp[node]["average_shortest_path_length"]+"\",\""+dictProp[node]["Betweenness_centrality"]+"\",\""+dictProp[node]["closeness_centrality"]+"\",\""+dictProp[node]["clustering_coefficient"]+"\",\""+dictProp[node]["eccentricity"]+"\",\""+dictProp[node]["edge_count"]+"\",\""+dictProp[node]["In_degree"]+"\",\""+node+"\",\""+dictProp[node]["Neighborhood_conectivity"]+"\",\""+dictProp[node]["Out_degree"]+"\",\""+dictProp[node]["stress"]+"\"\n") outFile.close()
def extended_stats(G, connectivity=False, anc=False, ecc=False, bc=False, cc=False): """ Calculate extended topological stats and metrics for a graph. Many of these algorithms have an inherently high time complexity. Global topological analysis of large complex networks is extremely time consuming and may exhaust computer memory. Consider using function arguments to not run metrics that require computation of a full matrix of paths if they will not be needed. Parameters ---------- G : networkx multidigraph connectivity : bool if True, calculate node and edge connectivity anc : bool if True, calculate average node connectivity ecc : bool if True, calculate shortest paths, eccentricity, and topological metrics that use eccentricity bc : bool if True, calculate node betweenness centrality cc : bool if True, calculate node closeness centrality Returns ------- stats : dict dictionary of network measures containing the following elements (some only calculated/returned optionally, based on passed parameters): - avg_neighbor_degree - avg_neighbor_degree_avg - avg_weighted_neighbor_degree - avg_weighted_neighbor_degree_avg - degree_centrality - degree_centrality_avg - clustering_coefficient - clustering_coefficient_avg - clustering_coefficient_weighted - clustering_coefficient_weighted_avg - pagerank - pagerank_max_node - pagerank_max - pagerank_min_node - pagerank_min - node_connectivity - node_connectivity_avg - edge_connectivity - eccentricity - diameter - radius - center - periphery - closeness_centrality - closeness_centrality_avg - betweenness_centrality - betweenness_centrality_avg """ stats = {} full_start_time = time.time() # create a DiGraph from the MultiDiGraph, for those metrics that require it G_dir = nx.DiGraph(G) # create an undirected Graph from the MultiDiGraph, for those metrics that # require it G_undir = nx.Graph(G) # get the largest strongly connected component, for those metrics that # require strongly connected graphs G_strong = get_largest_component(G, strongly=True) # average degree of the neighborhood of each node, and average for the graph avg_neighbor_degree = nx.average_neighbor_degree(G) stats['avg_neighbor_degree'] = avg_neighbor_degree stats['avg_neighbor_degree_avg'] = sum(avg_neighbor_degree.values())/len(avg_neighbor_degree) # average weighted degree of the neighborhood of each node, and average for # the graph avg_weighted_neighbor_degree = nx.average_neighbor_degree(G, weight='length') stats['avg_weighted_neighbor_degree'] = avg_weighted_neighbor_degree stats['avg_weighted_neighbor_degree_avg'] = sum(avg_weighted_neighbor_degree.values())/len(avg_weighted_neighbor_degree) # degree centrality for a node is the fraction of nodes it is connected to degree_centrality = nx.degree_centrality(G) stats['degree_centrality'] = degree_centrality stats['degree_centrality_avg'] = sum(degree_centrality.values())/len(degree_centrality) # calculate clustering coefficient for the nodes stats['clustering_coefficient'] = nx.clustering(G_undir) # average clustering coefficient for the graph stats['clustering_coefficient_avg'] = nx.average_clustering(G_undir) # calculate weighted clustering coefficient for the nodes stats['clustering_coefficient_weighted'] = nx.clustering(G_undir, weight='length') # average clustering coefficient (weighted) for the graph stats['clustering_coefficient_weighted_avg'] = nx.average_clustering(G_undir, weight='length') # pagerank: a ranking of the nodes in the graph based on the structure of # the incoming links pagerank = nx.pagerank(G_dir, weight='length') stats['pagerank'] = pagerank # node with the highest page rank, and its value pagerank_max_node = max(pagerank, key=lambda x: pagerank[x]) stats['pagerank_max_node'] = pagerank_max_node stats['pagerank_max'] = pagerank[pagerank_max_node] # node with the lowest page rank, and its value pagerank_min_node = min(pagerank, key=lambda x: pagerank[x]) stats['pagerank_min_node'] = pagerank_min_node stats['pagerank_min'] = pagerank[pagerank_min_node] # if True, calculate node and edge connectivity if connectivity: start_time = time.time() # node connectivity is the minimum number of nodes that must be removed # to disconnect G or render it trivial stats['node_connectivity'] = nx.node_connectivity(G_strong) # edge connectivity is equal to the minimum number of edges that must be # removed to disconnect G or render it trivial stats['edge_connectivity'] = nx.edge_connectivity(G_strong) log('Calculated node and edge connectivity in {:,.2f} seconds'.format(time.time() - start_time)) # if True, calculate average node connectivity if anc: # mean number of internally node-disjoint paths between each pair of # nodes in G, i.e., the expected number of nodes that must be removed to # disconnect a randomly selected pair of non-adjacent nodes start_time = time.time() stats['node_connectivity_avg'] = nx.average_node_connectivity(G) log('Calculated average node connectivity in {:,.2f} seconds'.format(time.time() - start_time)) # if True, calculate shortest paths, eccentricity, and topological metrics # that use eccentricity if ecc: # precompute shortest paths between all nodes for eccentricity-based # stats start_time = time.time() sp = {source:dict(nx.single_source_dijkstra_path_length(G_strong, source, weight='length')) for source in G_strong.nodes()} log('Calculated shortest path lengths in {:,.2f} seconds'.format(time.time() - start_time)) # eccentricity of a node v is the maximum distance from v to all other # nodes in G eccentricity = nx.eccentricity(G_strong, sp=sp) stats['eccentricity'] = eccentricity # diameter is the maximum eccentricity diameter = nx.diameter(G_strong, e=eccentricity) stats['diameter'] = diameter # radius is the minimum eccentricity radius = nx.radius(G_strong, e=eccentricity) stats['radius'] = radius # center is the set of nodes with eccentricity equal to radius center = nx.center(G_strong, e=eccentricity) stats['center'] = center # periphery is the set of nodes with eccentricity equal to the diameter periphery = nx.periphery(G_strong, e=eccentricity) stats['periphery'] = periphery # if True, calculate node closeness centrality if cc: # closeness centrality of a node is the reciprocal of the sum of the # shortest path distances from u to all other nodes start_time = time.time() closeness_centrality = nx.closeness_centrality(G, distance='length') stats['closeness_centrality'] = closeness_centrality stats['closeness_centrality_avg'] = sum(closeness_centrality.values())/len(closeness_centrality) log('Calculated closeness centrality in {:,.2f} seconds'.format(time.time() - start_time)) # if True, calculate node betweenness centrality if bc: # betweenness centrality of a node is the sum of the fraction of # all-pairs shortest paths that pass through node start_time = time.time() betweenness_centrality = nx.betweenness_centrality(G, weight='length') stats['betweenness_centrality'] = betweenness_centrality stats['betweenness_centrality_avg'] = sum(betweenness_centrality.values())/len(betweenness_centrality) log('Calculated betweenness centrality in {:,.2f} seconds'.format(time.time() - start_time)) log('Calculated extended stats in {:,.2f} seconds'.format(time.time()-full_start_time)) return stats
graph = nx.Graph(G) graph.remove_edges_from(loops) # get largest connected component # unfortunately, the iterator over the components is not guaranteed to be sorted by size components = sorted(nx.connected_components(graph), key=len, reverse=True) lcc = graph.subgraph(components[0]) pos=nx.spring_layout(lcc) d = nx.degree(lcc) #nx.draw(lcc, nodelist=d.keys(), node_size=[v * 20 for v in d.values()]) #nx.draw_networkx_labels(lcc,pos=nx.spring_layout(lcc)) #plt.show() # code for histogram degree_sequence=sorted(nx.degree(G).values(),reverse=True) dmax=max(degree_sequence) plt.loglog(degree_sequence,'b-',marker='o') plt.title("Degree rank plot") plt.ylabel("degree") plt.xlabel("rank") plt.axes([0.45,0.45,0.45,0.45]) Gcc=sorted(nx.connected_component_subgraphs(G), key = len, reverse=True)[0] pos=nx.spring_layout(Gcc) plt.axis('off') nx.draw_networkx_nodes(Gcc,pos,node_size=20) nx.draw_networkx_edges(Gcc,pos,alpha=0.4) plt.savefig("./USA/degree_histogram_usa.png") plt.show() print(nx.average_neighbor_degree(G, source='in', target='in'))
def average_neighbor_degree_sum(self): if (self.average_neighbor_degree_dict == None): self.average_neighbor_degree_dict = nx.average_neighbor_degree(self.graph) return self.average_neighbor_degree_dict[self.node_1] + self.average_neighbor_degree_dict[self.node_2]
def has_large_inout_nodes(g, cutoff=5): for n in nx.average_neighbor_degree(g).values(): if n > cutoff: return True return False
def avg_degree(self): ''' Return average number of degree for each node ''' return nx.average_neighbor_degree(self._graph)
def neighdeg(network): return distri(nx.average_neighbor_degree(network).values(),'neighbor_degree')
G = nx.Graph() f = open("network", "r") for line in f: fields = line.strip().split() G.add_edge(int(fields[0]), int(fields[1])) f.close() sys.stderr.write("Data load! Runtime: %s\n" % (time.time() - start)) avg_clusterings = nx.clustering(G) sys.stderr.write("Clusering calculated! Runtime: %s\n" % (time.time() - start)) neigh_degree = nx.average_neighbor_degree(G) sys.stderr.write("AVG Neighbor degree calculated! Runtime: %s\n" % (time.time() - start)) bet_centr = nx.betweenness_centrality(G, k = 10000) sys.stderr.write("Betweenness centrality calculated! Runtime: %s\n" % (time.time() - start)) clo_centr = nx.closeness_centrality(G) sys.stderr.write("Closeness centrality calculated! Runtime: %s\n" % (time.time() - start)) f = open("node_stats_approx", 'w') for i in G: f.write("%d::%s::%s::%s::%s\n" % (i, avg_clusterings[i], neigh_degree[i], bet_centr[i], clo_centr[i])) f.close()
# remove parallel edges and self-loops graph = nx.Graph(G) graph.remove_edges_from(loops) # get largest connected component # unfortunately, the iterator over the components is not guaranteed to be sorted by size components = sorted(nx.connected_components(graph), key=len, reverse=True) lcc = graph.subgraph(components[0]) pos = nx.spring_layout(lcc) d = nx.degree(lcc) # nx.draw(lcc, nodelist=d.keys(), node_size=[v * 20 for v in d.values()]) # nx.draw_networkx_labels(lcc,pos=nx.spring_layout(lcc)) # plt.show() # code for histogram degree_sequence = sorted(nx.degree(G).values(), reverse=True) dmax = max(degree_sequence) plt.loglog(degree_sequence, "b-", marker="o") plt.title("Degree rank plot") plt.ylabel("degree") plt.xlabel("rank") plt.axes([0.45, 0.45, 0.45, 0.45]) Gcc = sorted(nx.connected_component_subgraphs(G), key=len, reverse=True)[0] pos = nx.spring_layout(Gcc) plt.axis("off") nx.draw_networkx_nodes(Gcc, pos, node_size=20) nx.draw_networkx_edges(Gcc, pos, alpha=0.4) plt.savefig("./USA/degree_histogram_usa.png") plt.show() print(nx.average_neighbor_degree(G, source="in", target="in"))
def transversal_measures(self): transversal_measures = [] continuous = False # - V(k) # all degree_volumes = [] keys = self.degree_distribution.keys() for key in keys: degree = self.degree_distribution[key] volume = self.volume_distribution[key] degree_volumes.append([degree,volume]) V_k = self.Stats.get_dependency(degree_volumes) # - getting the aggregate dependency V_k_agg = self.Stats.aggregate_distribution(V_k, self.aggregate_number) transversal_measures.append(V_k_agg) # - adding the sd of the real distribution transversal_measures.append(V_k[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k[1],V_k_agg[1],continuous) self.Stats.r_square([x[0] for x in degree_volumes],[x[1] for x in degree_volumes]) # in in_degree_volumes = [] keys = [] keys = self.in_degree_distribution.keys() for key in keys: in_degree = self.in_degree_distribution[key] in_volume = self.in_volume_distribution[key] in_degree_volumes.append([in_degree,in_volume]) V_k_in = self.Stats.get_dependency(in_degree_volumes) # - getting the aggregate dependency V_k_in_agg = self.Stats.aggregate_distribution(V_k_in, self.aggregate_number) transversal_measures.append(V_k_in_agg) # - adding the sd of the real distribution transversal_measures.append(V_k_in[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k_in[1],V_k_in_agg[1],continuous) self.Stats.r_square([x[0] for x in in_degree_volumes],[x[1] for x in in_degree_volumes]) # out out_degree_volumes = [] keys = [] keys = self.out_degree_distribution.keys() for key in keys: out_degree = self.out_degree_distribution[key] out_volume = self.out_volume_distribution[key] out_degree_volumes.append([out_degree,out_volume]) V_k_out = self.Stats.get_dependency(out_degree_volumes) # - getting the aggregate dependency V_k_out_agg = self.Stats.aggregate_distribution(V_k_out, self.aggregate_number) transversal_measures.append(V_k_out_agg) # - adding the sd of the real distribution transversal_measures.append(V_k_out[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(V_k_out[1],V_k_out_agg[1],continuous) self.Stats.r_square([x[0] for x in out_degree_volumes],[x[1] for x in out_degree_volumes]) # - C(k) G_undirected = self.G.to_undirected() undirected_degree_distribution = G_undirected.degree() # unweighted cluster degree_unweighted_clusters = [] keys = undirected_degree_distribution.keys() for key in keys: degree = undirected_degree_distribution[key] unweighted_cluster = self.unweighted_clustering_distribution[key] degree_unweighted_clusters.append([degree,unweighted_cluster]) C_k_unweighted = self.Stats.get_dependency(degree_unweighted_clusters) # - getting the aggregate dependency C_k_unweighted_agg = self.Stats.aggregate_distribution(C_k_unweighted, self.aggregate_number) transversal_measures.append(C_k_unweighted_agg) # - adding the sd of the real distribution transversal_measures.append(C_k_unweighted[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(C_k_unweighted[1],C_k_unweighted_agg[1],continuous) self.Stats.r_square([x[0] for x in degree_unweighted_clusters],[x[1] for x in degree_unweighted_clusters]) # weighted cluster degree_weighted_clusters = [] # keys = self.degree_distribution.keys() for key in keys: degree = undirected_degree_distribution[key] weighted_cluster = self.weighted_clustering_distribution[key] degree_weighted_clusters.append([degree,weighted_cluster]) C_k_weighted = self.Stats.get_dependency(degree_weighted_clusters) # - getting the aggregate dependency C_k_weighted_agg = self.Stats.aggregate_distribution(C_k_weighted, self.aggregate_number) transversal_measures.append(C_k_weighted_agg) # - adding the sd of the real distribution transversal_measures.append(C_k_weighted[2]) # storing KS and Rsquared self.Stats.kolmogorov_smirnov(C_k_weighted[1],C_k_weighted_agg[1],continuous) self.Stats.r_square([x[0] for x in degree_weighted_clusters],[x[1] for x in degree_weighted_clusters]) # - Vij # average weight of links for Ki*Kj edges_volume_degree = [] for edge in self.G.edges(data = True): node1_degree = self.out_degree_distribution[edge[0]] node2_degree = self.in_degree_distribution[edge[1]] weight = edge[2][self.weight_id] edges_volume_degree.append([node1_degree*node2_degree, weight]) volume_end_point_degree = self.Stats.get_dependency(edges_volume_degree) transversal_measures.append(volume_end_point_degree) # - Knn # unweighted # undirected average_neighbor_degrees = nx.average_neighbor_degree(self.G) average_neighbor_degree_k = [] for key in keys: degree = undirected_degree_distribution[key] average_neighbor_degree = average_neighbor_degrees[key] average_neighbor_degree_k.append([degree,average_neighbor_degree]) average_neighbor_degree_k_dep = self.Stats.get_dependency(average_neighbor_degree_k) # adding to the general values [average_neighbor_degree_mean, average_neighbor_degree_sd] = self.Stats.get_mean_sd(average_neighbor_degrees) self.features[0].append(average_neighbor_degree_mean) self.features[0].append(average_neighbor_degree_sd) # - getting the aggregate dependency average_neighbor_degree_k_agg = self.Stats.aggregate_distribution(average_neighbor_degree_k_dep, self.aggregate_number) transversal_measures.append(average_neighbor_degree_k_agg) # - adding the sd of the real distribution transversal_measures.append(average_neighbor_degree_k_dep[2]) # - computing the KS and R square test self.Stats.kolmogorov_smirnov(average_neighbor_degree_k_dep[1], average_neighbor_degree_k_agg[1], continuous) self.Stats.r_square([x[0] for x in average_neighbor_degree_k],[x[1] for x in average_neighbor_degree_k]) # weighted # undirected average_neighbor_degrees_weighted = nx.average_neighbor_degree(self.G, weight = self.weight_id) average_neighbor_degree_weighted_k = [] for key in keys: degree = undirected_degree_distribution[key] average_neighbor_degree_weighted = average_neighbor_degrees_weighted[key] average_neighbor_degree_weighted_k.append([degree,average_neighbor_degree_weighted]) average_neighbor_degree_weighted_k_dep = self.Stats.get_dependency(average_neighbor_degree_weighted_k) # adding to the general values [average_neighbor_degree_weighted_mean, average_neighbor_degree_weighted_sd] = self.Stats.get_mean_sd(average_neighbor_degrees_weighted) self.features[0].append(average_neighbor_degree_weighted_mean) self.features[0].append(average_neighbor_degree_weighted_sd) # - getting the aggregate dependency average_neighbor_degree_weighted_k_agg = self.Stats.aggregate_distribution(average_neighbor_degree_weighted_k_dep, self.aggregate_number) transversal_measures.append(average_neighbor_degree_weighted_k_agg) # - adding the sd of the real distribution transversal_measures.append(average_neighbor_degree_weighted_k_dep[2]) # - computing the KS and R square test self.Stats.kolmogorov_smirnov(average_neighbor_degree_weighted_k_dep[1], average_neighbor_degree_weighted_k_agg[1], continuous) self.Stats.r_square([x[0] for x in average_neighbor_degree_weighted_k],[x[1] for x in average_neighbor_degree_weighted_k]) self.features.append(transversal_measures)
def test_degree_k4_nodes(self): G=nx.complete_graph(4) answer={1:3.0,2:3.0} nd = nx.average_neighbor_degree(G,nodes=[1,2]) assert_equal(nd,answer)
G=nx.DiGraph() filename = "TF_analysis.csv" csvfile = open(filename) #1~300 # print G.node for row in csv.reader(csvfile): G.add_edges_from([(row[0],row[1])],weight=row[2]) print "average_neighbor_degree" print nx.average_neighbor_degree(G) print "degree_assortativity_coefficient" print nx.degree_assortativity_coefficient(G) print "degree_pearson_correlation_coefficient" print nx.degree_pearson_correlation_coefficient(G) #print nx.k_nearest_neighbors(G) print "bipartite.closeness_centrality" print bipartite.closeness_centrality(G,G.node) print "degree_centrality" print nx.degree_centrality(G)
m = 8 n = 137 np.Array1 = n*[m*[0]] for row in csv.reader(csvfile): G.add_edges_from([(row[0],row[1])],weight=row[2]) Gene_name.append(row[0]) #queue = deque([row[0]]) #print G.node #print Gene_name #Model of comparing networks print "average_neighbor_degree" dict = nx.average_neighbor_degree(G) # print dict #異常検出のリスト put_aside = [] for x in Gene_name: y = str(x) put_aside.append(dict[y]) list1 = dict.keys() list2 = dict.values() print nx.triangles(G,0)