def data(): #Networkx graph configuration G = nx.Graph() infile = open('redisdb.log') # open the file for reading for line in infile: # go through the input file, one line at a time line = line.strip( ) # remove the newline character at the endof each line root, follower = line.split( ',') # split up line around comma characters G.add_edge(root, follower) # Initialize graph, add nodes and edges, calculate modularity and centrality. groups = community.best_partition(G) degree = cn.degree_centrality(G) # Add node attributes for name, modularity, and three types of centrality. nx.set_node_attributes(G, groups, 'group') nx.set_node_attributes(G, degree, 'degree') # create json dictionary format for networkx edges data1 = json_graph.node_link_data(G) #output json file with open('static/data.json', 'w') as output: json.dump(data1, output, sort_keys=True, indent=4, separators=(',', ':')) return ''
def centrality_analysis(G, isDriected=False): ''' :param g: Digraph()/ Graph() :return: several types of centrality of each nodes ''' nodes = G.nodes() if isDriected: in_dc = centrality.in_degree_centrality(G) out_dc = centrality.out_degree_centrality(G) bc = centrality.betweenness_centrality(G) ec = centrality.eigenvector_centrality(G) cent = {} for node in nodes: cent[node] = [in_dc[node], out_dc[node], bc[node], ec[node]] print( "Four types of centrality are calculated \n" + "\n\tin_degree_centrality\n\tout_degree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality" ) return cent else: dc = centrality.degree_centrality(G) bc = centrality.betweenness_centrality(G) ec = centrality.eigenvector_centrality(G) cent = {} for node in nodes: cent[node] = [dc[node], bc[node], ec[node]] print( "Three types of centrality are calculated \n" + "\n\tdegree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality" ) return cent
def get_centrality(x, edge_index, batch): num_graphs = batch[-1] + 1 N = x.shape[0] num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0) cum_num_nodes = torch.cat( [num_nodes.new_zeros(1), num_nodes.cumsum(dim=0)[:-1]], dim=0) cum_num_nodes = torch.cat((cum_num_nodes, torch.tensor([N]).cuda())) row, col = edge_index c_centrality = [] d_centrality = [] for i in range(num_graphs): '''each graph''' s_id = cum_num_nodes[i] e_id = cum_num_nodes[i + 1] mask = torch.eq(row, s_id) for node in range(s_id + 1, e_id): mask = mask + torch.eq(row, node) g_row = torch.masked_select(row, mask) - s_id g_col = torch.masked_select(col, mask) - s_id G = to_networkx(torch.stack([g_row, g_col], dim=0)) c_centrality = c_centrality + list(closeness_centrality(G).values()) d_centrality = d_centrality + list(degree_centrality(G).values()) c_centrality = torch.Tensor(c_centrality).cuda() d_centrality = torch.Tensor(d_centrality).cuda() return c_centrality, d_centrality
def get_centrality_labels(knn_graph_obj, perc_labeled, type='degree'): import random if type == 'degree': degree_centrality_knn = pd.DataFrame.from_dict(centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = degree_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(degree_centrality_knn.index))].tolist() elif type == 'closeness': closeness_centrality_knn = pd.DataFrame.from_dict(centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = closeness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(closeness_centrality_knn.index))].tolist() elif type == 'betweenness': betweenness_centrality_knn = pd.DataFrame.from_dict(centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = betweenness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(betweenness_centrality_knn.index))].tolist() elif type == 'katz': katz_centrality_knn = pd.DataFrame.from_dict(centrality.katz_centrality(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = katz_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(katz_centrality_knn.index))].tolist() elif type == 'clustering': clustering_knn = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value']) node_toget_labels = clustering_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(clustering_knn.index))].tolist() else: indexes = list(knn_graph_obj.nodes) #print(indexes) node_toget_labels = random.sample(indexes, int(perc_labeled*len(indexes))) #print(node_toget_labels) return node_toget_labels
def get_topological_features(G, nodes=None): N_ = len(G.nodes) if nodes is None: nodes = G.nodes # Degree centrality d_c = get_features(degree_centrality(G).values()) print 'a' # Betweeness centrality b_c = get_features(betweenness_centrality(G).values()) print 'b' # Close ness centrality c_c = get_features(closeness_centrality(G).values()) print 'c' # Clustering c = get_features(clustering(G).values()) print 'd' d = diameter(G) r = radius(G) s_p_average = [] for s in shortest_path_length(G): dic = s[1] lengths = dic.values() s_p_average += [sum(lengths) / float(N_)] s_p_average = get_features(s_p_average) features = np.concatenate((d_c, b_c, c_c, c, s_p_average, [d], [r]), axis=0) return features
def __init__(self,numberOfNode,totalPowerForEachNode): #self.graph = nx.powerlaw_cluster_graph(numberOfNode,(int)(numberOfNode/10),0.1) self.graph = nx.barabasi_albert_graph(numberOfNode,(int)(numberOfNode/20)) self.currentPower=[0]*numberOfNode self.totalPower=[totalPowerForEachNode]*numberOfNode self.MeanPower=[0]*numberOfNode self.degreeCentralityCoef=list(Centrality.degree_centrality(self.graph).values())
def get_layer_info(subject, journal_volume, edge_list): G = nx.Graph() G.add_weighted_edges_from(edge_list) PATH = "C:/Users/hexie/Documents/APS_result/" + str( journal_volume) + "/" + str(subject) try: os.mkdir(PATH) os.chdir(PATH) except: os.chdir(PATH) degree_centrality = nxc.degree_centrality(G) try: eigen_vector_centrality = nxc.eigenvector_centrality(G) np.save("eigen_vector_centrality.npy", eigen_vector_centrality) except: print("fail to converge within 100 iterations of power") closeness_centrality = nxc.closeness_centrality(G) betweeness_centrality = nxc.betweenness_centrality(G) np.save("degree_centrality.npy", degree_centrality) np.save("closeness_centrality.npy", closeness_centrality) np.save("betweeness_centrality.npy", betweeness_centrality) with open(str(subject) + str(journal_volume) + ".txt", 'w') as f: f.write('Number of Edges: ' + str(nx.number_of_edges(G)) + "\n") f.write('Number of Nodes: ' + str(nx.number_of_nodes(G)) + "\n") nx.draw(G) plt.savefig(str(subject) + str(journal_volume) + ".png") plt.clf()
def get_centrality_labels(knn_graph_obj, type='degree'): import random if type == 'degree': node_toget_labels = pd.DataFrame.from_dict( centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'closeness': node_toget_labels = pd.DataFrame.from_dict( centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'betweenness': node_toget_labels = pd.DataFrame.from_dict( centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value']) elif type == 'clustering': node_toget_labels = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value']) else: node_toget_labels = list(knn_graph_obj.nodes) #print(node_toget_labels) return node_toget_labels
def findingCentrality(self,numberOfCenralityNode): degreeCentralityArray=list(Centrality.degree_centrality(self.graph).values()) sortedDegreeCentralityArray=sorted(degreeCentralityArray) IndexOfSortedDegreeCentralityArray=sorted(range(len(degreeCentralityArray)), key=lambda x: degreeCentralityArray[x]) output=[] for i in range(0,numberOfCenralityNode): output.append([IndexOfSortedDegreeCentralityArray[-i-1],sortedDegreeCentralityArray[-i-1]]) return output
def extract_degree_centrality(self): output = open( 'output/' + self.set_ + '/' + self.set_ + '_degree_centrality.csv', 'w') print('Calculating degree centrality') nodes = centrality.degree_centrality(self.G) for key in nodes: output.write(str(key) + ',' + str(nodes[key]) + '\n')
def write_highest_degree_cent(temp, file_degree_centr, stop): dc_high = sort_dictionary_by_value_desc(central.degree_centrality(temp)) dc_high_count = Counter(dc_high) writer = csv.writer(file_degree_centr, delimiter=';') row = [stop.date()] for k, v in dc_high_count.most_common(5): row.append('%s: %f' % (k.replace(',', ''), v)) writer.writerow(row) return
def get_degree_centrality(dataset): centrality = [] for data in dataset: '''each graph''' g_row, g_col = data.edge_index G = to_networkx(torch.stack([g_row, g_col], dim=0)) c_centrality = list(degree_centrality(G).values()) centrality = centrality + c_centrality return centrality
def compute_metrics(graph): G = json_graph.node_link_graph(graph, multigraph=False) degree_centrality = centrality.degree_centrality(G) closeness_centrality = centrality.closeness_centrality(G) betweenness_centrality = centrality.betweenness_centrality(G) page_rank = link_analysis.pagerank_alg.pagerank(G) max_clique = approximation.clique.max_clique(G) diameters = [distance_measures.diameter(g) for g in connected_component_subgraphs(G)] copy = dict() copy['id'] = graph['id'] copy['name'] = graph['name'] copy['graph'] = dict() copy['graph']['nodes'] = graph['nodes'] copy['graph']['links'] = graph['links'] copy['metrics'] = dict() # diameters copy['metrics']['diameter'] = dict() copy['metrics']['diameter']['all'] = diameters copy['metrics']['diameter']['max'] = max(diameters) copy['metrics']['diameter']['average'] = float(sum(diameters)) / float(len(diameters)) # clique size copy['metrics']['maxClique'] = len(list(max_clique)) # degree centrality copy['metrics']['degreeCentrality'] = dict() copy['metrics']['degreeCentrality']['byId'] = degree_centrality copy['metrics']['degreeCentrality']['max'] = sum(degree_centrality.values()) copy['metrics']['degreeCentrality']['average'] = float(sum(degree_centrality.values())) / float(len(degree_centrality.values())) # closeness centrality copy['metrics']['closenessCentrality'] = dict() copy['metrics']['closenessCentrality']['byId'] = closeness_centrality copy['metrics']['closenessCentrality']['max'] = sum(closeness_centrality.values()) copy['metrics']['closenessCentrality']['average'] = float(sum(closeness_centrality.values())) / float(len(closeness_centrality.values())) # degree centrality copy['metrics']['betweennessCentrality'] = dict() copy['metrics']['betweennessCentrality']['byId'] = betweenness_centrality copy['metrics']['betweennessCentrality']['max'] = sum(betweenness_centrality.values()) copy['metrics']['betweennessCentrality']['average'] = float(sum(betweenness_centrality.values())) / float(len(betweenness_centrality.values())) # degree centrality copy['metrics']['pageRank'] = dict() copy['metrics']['pageRank']['byId'] = page_rank copy['metrics']['pageRank']['max'] = sum(page_rank.values()) copy['metrics']['pageRank']['average'] = float(sum(page_rank.values())) / float(len(page_rank.values())) return copy
def embedding_method(digraph, p_id): # return graph features like degree short_path... # 连接数 indegree = centrality.degree_centrality(digraph) # 中心性 node_centrality = centrality.eigenvector_centrality_numpy(digraph) # 社区数 # clique = nx.algorithms.clique.number_of_cliques(digraph.to_undirected()) dict2matrix = lambda x: pd.DataFrame.from_dict( x, orient='index').values[p_id] concats = list(map(dict2matrix, [indegree, node_centrality])) design_matrix = np.concatenate(concats, axis=1) return design_matrix
def analyze_graph(graph: 'TopicGraph'): users = {} for node in graph.nodes(): if isinstance(node, User): ins = [f"{e}" for e in graph.in_edges(node)] outs = [f"{e}" for e in graph.out_edges(node)] if node.id not in users.keys(): user = {"NODE": node, "INS": len(ins), "OUTS": len(outs)} users[node.id] = user else: users[node.id]["INS"] += len(ins) users[node.id]["OUTS"] += len(outs) centrality = degree_centrality(graph) user_stats = [] for u in [(k, v) for k, v in sorted(users.items(), key=lambda item: item[1]["INS"], reverse=True)]: user_posts = get_user_posts(graph, u[1]["NODE"]) sentiment = average_user_sentiment(user_posts) u[1]["SENTIMENT"] = sentiment u[1]["NUM_POSTS"] = len(user_posts) u[1]["AVG_POST_LEN"] = sum([len(p.text) for p in user_posts]) / len(user_posts) u[1]["NUM_TOPICS"] = len(get_user_topics(graph, u[1]["NODE"])) for k in centrality.keys(): if f"{k}" == f"{u[1]['NODE'].id}": u[1]["CENTRALITY_SCORE"] = centrality[k] user_stats.append(u) num_in_edges = float(sum(i[1]["INS"] for i in user_stats)) """Please don't mind the mess that follows... pay attention to the other, prettier code... 😅""" for u in user_stats: u[1]["EDGE_SCORE"] = float(u[1]["INS"]) / num_in_edges u[1]["TOPIC_SCORE"] = float(u[1]["NUM_TOPICS"]) / len(get_all_posts(graph)) u[1]["AVG_POST_LEN_SCORE"] = float(u[1]["AVG_POST_LEN"]) / sum([l[1]["AVG_POST_LEN"] for l in user_stats]) for u in user_stats: _u = u[1] influence = avg([_u[score] for score in _u.keys() if "SCORE" in score]) # weight = float(_u["NUM_POSTS"])/len(get_all_posts(graph)) _u["RAW_INFLUENCE"] = influence inorm = sum(i[1]["RAW_INFLUENCE"] for i in user_stats) for u in user_stats: _u = u[1] _u["INFLUENCE"] = float(_u["RAW_INFLUENCE"]) / inorm for i, u in enumerate(sorted(user_stats, key=lambda item: item[1]["INFLUENCE"], reverse=True)): _u = u[1] _u["RANK"] = i return user_stats
def calc_node_based_centrality(edge_index, centrality='degree'): adj_list = edge_index.numpy().T G = nx.Graph() G.add_edges_from(adj_list) if centrality == 'degree': nodes_centrality = degree_centrality(G) elif centrality == 'eigenvector': nodes_centrality = eigenvector_centrality(G) elif centrality == "closeness": nodes_centrality = closeness_centrality(G) else: print(centrality, "is not defined") exit(1) edges_centrality = dict() for u, v in adj_list: edges_centrality[(u, v)] = nodes_centrality[u] * nodes_centrality[v] return edges_centrality
def top_nodes(G, k=3): """ Returns the top k nodes for various centrality measures: degree, betweennes and closeness. Args: G (nx.Graph): graph for which the top nodes must be determined. k (int): number of top nodes to return. if set to -ve, all the nodes will be returned. Returns: res_dict (dict): dictionary of each centrality measure with list of top k nodes in that measure as values to the dictionary. """ # number of nodes in the graph each node is connected to node_deg_dict = centrality.degree_centrality(G) # number of all pair shortest paths that pass through each node node_btw_dict = centrality.betweenness_centrality(G) # number of neighbours connected to each other for each node node_clo_dict = centrality.closeness_centrality(G) # sort by nodes by each centrality measure in decreasing order top_k_deg_nodes = sorted(node_deg_dict.items(), key=lambda x: -x[1]) top_k_btw_nodes = sorted(node_btw_dict.items(), key=lambda x: -x[1]) top_k_clo_nodes = sorted(node_clo_dict.items(), key=lambda x: -x[1]) # pick the top k nodes res_dict = dict() if k > 0: res_dict["degree"] = list(zip(*top_k_deg_nodes[:k]))[0] res_dict["betweenness"] = list(zip(*top_k_btw_nodes[:k]))[0] res_dict["closeness"] = list(zip(*top_k_clo_nodes[:k]))[0] else: res_dict["degree"] = list(zip(*top_k_deg_nodes))[0] res_dict["betweenness"] = list(zip(*top_k_btw_nodes))[0] res_dict["closeness"] = list(zip(*top_k_clo_nodes))[0] return res_dict
def parse(name): print(name) pathbase = path.abspath(path.dirname(__file__)) G = nx.Graph() data = json.load(open('{0}/{1}.json'.format(pathbase, name))) nodes = data['nodes'] text = {i: node['text'] for i, node in enumerate(nodes)} weight = {i: float(node['weight']) for i, node in enumerate(nodes)} for i in range(len(nodes)): G.add_node(i) for link in data['links']: G.add_edge(link['source'], link['target']) degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweenness = centrality.betweenness_centrality(G) #edge_betweenness = centrality.edge_betweenness_centrality(G) #current_flow_closeness = centrality.current_flow_closeness_centrality(G) #current_flow_betweenness =\ # centrality.current_flow_betweenness_centrality(G) try: eigenvector = centrality.eigenvector_centrality(G, max_iter=1000) except: eigenvector = {i: 0 for i in range(len(nodes))} katz = centrality.katz_centrality(G) obj = {'nodes': [], 'links': data['links']} for i in range(len(nodes)): obj['nodes'].append({ 'text': text[i], 'weight': weight[i], 'degree': degree[i], 'closeness': closeness[i], 'betweenness': betweenness[i], #'edge_betweenness': edge_betweenness[i], #'current_flow_closeness': current_flow_closeness[i], #'current_flow_betweenness': current_flow_betweenness[i], 'eigenvector': eigenvector[i], 'katz': katz[i], }) json.dump(obj, open('{0}/../data/{1}.json'.format(pathbase, name), 'w'), sort_keys=True)
def get_central_nodes(nodes, parts, full_network, num_nodes, page_rank): central_flags = [0]*len(parts) for part_id in set(parts): part_nodes = [] pr_centrality = {} for i, node in enumerate(nodes): if parts[i] == part_id: part_nodes.append(node) if len(page_rank) > 0: pr_centrality[node] = page_rank[node] if len(page_rank) == 0: sub_graph = full_network.subgraph(part_nodes) centrality = degree_centrality(sub_graph) centrality_sorted = [x for x in sorted(centrality, key=centrality.get, reverse=True)] else: centrality_sorted = [x for x in sorted(pr_centrality, key=pr_centrality.get, reverse=True)] for central_node in centrality_sorted[0: num_nodes]: central_flags[nodes.index(central_node)] = 1 return central_flags
def degree_centrality(graph): """degree_centrality""" return list(centrality.degree_centrality(graph).values())
def get_degree_centrality(G, **kwargs): """Returns a dictionary of degree centrality values for all nodes. """ # Compute and return the degree cenntrality return nxc.degree_centrality(G)
#ノードの色の指定 d = list(G.nodes) g_dict = {} for i in range(len(c)): g_dict[i] = i c_list = [] for j in range(len(d)): for n in range(len(c)): if d[j] in c[n]: c_list.append(g_dict[n]) #中心性解析 #次数中心性 cent_values = degree_centrality(G).values() cent_central = degree_centrality(G) cent_keys = degree_centrality(G).keys() #近接中心性 d_values = closeness_centrality(G).values() d_central = closeness_centrality(G) d_keys = closeness_centrality(G).keys() #媒介中心性 bet_values = betweenness_centrality(G).values() bet_central = betweenness_centrality(G) bet_keys = betweenness_centrality(G).keys() #sorted(d_dict.values()) d_values_list = list(d_values)
def compute_features(self): # Degree centrality degree_centrality = lambda graph: list( centrality.degree_centrality(graph).values()) self.add_feature( "degree centrality", degree_centrality, "The degree centrality distribution", InterpretabilityScore(5), statistics="centrality", ) # Betweenness Centrality betweenness_centrality = lambda graph: list( centrality.betweenness_centrality(graph).values()) self.add_feature( "betweenness centrality", betweenness_centrality, "Betweenness centrality of a node v is the sum of the fraction of \ all-pairs shortest paths that pass through v", InterpretabilityScore(5), statistics="centrality", ) # Closeness centrality closeness_centrality = lambda graph: list( centrality.closeness_centrality(graph).values()) self.add_feature( "closeness centrality", closeness_centrality, "Closeness is the reciprocal of the average shortest path distance", InterpretabilityScore(5), statistics="centrality", ) # Edge betweenness centrality def edge_betweenness_centrality(graph): if graph.edges: return list( centrality.edge_betweenness_centrality(graph).values()) return [np.nan] self.add_feature( "edge betweenness centrality", edge_betweenness_centrality, "Betweenness centrality of an edge e is the sum of the fraction of \ all-pairs shortest paths that pass through e", InterpretabilityScore(4), statistics="centrality", ) # Harmonic centrality harmonic_centrality = lambda graph: list( centrality.harmonic_centrality(graph).values()) self.add_feature( "harmonic centrality", harmonic_centrality, "Harmonic centrality of a node u is the sum of the reciprocal \ of the shortest path distances from all other nodes to u", InterpretabilityScore(4), statistics="centrality", ) # Subgraph centrality subgraph_centrality = lambda graph: list( centrality.subgraph_centrality(graph).values()) self.add_feature( "subgraph centrality", subgraph_centrality, "The subgraph centrality for a node is the sum of weighted closed walks \ of all lengths starting and ending at that node.", InterpretabilityScore(3), statistics="centrality", ) # Second order centrality second_order_centrality = lambda graph: list( centrality.second_order_centrality(utils.ensure_connected(graph)). values()) self.add_feature( "second order centrality", second_order_centrality, "The second order centrality of a given node is the standard deviation \ of the return times to that node of a perpetual random walk on G", InterpretabilityScore(4), statistics="centrality", ) # Eigenvector centrality eigenvector_centrality = lambda graph: list( centrality.eigenvector_centrality_numpy( utils.ensure_connected(graph)).values()) self.add_feature( "eigenvector centrality", eigenvector_centrality, "Eigenvector centrality computes the centrality for a node based \ on the centrality of its neighbors", InterpretabilityScore(4), statistics="centrality", ) # Katz centrality katz_centrality = lambda graph: list( centrality.katz_centrality_numpy(utils.ensure_connected(graph)). values()) self.add_feature( "katz centrality", katz_centrality, "Generalisation of eigenvector centrality - Katz centrality computes the \ centrality for a node based on the centrality of its neighbors", InterpretabilityScore(4), statistics="centrality", ) # Page Rank pagerank = lambda graph: list(nx.pagerank_numpy(graph).values()) self.add_feature( "pagerank", pagerank, "The pagerank computes a ranking of the nodes in the graph based on \ the structure of the incoming links. ", InterpretabilityScore(4), statistics="centrality", )
def update_properties(): partition = community.best_partition(network) p_, nodes_community = zip(*sorted(partition.items())) nodes_source.data['community'] = nodes_community nodes_source.data['community_color'] = [community_colors[t % len(community_colors)] for t in nodes_community] centrality = centrality_metrics[select_centrality.value](network, weight='weight') _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.data['centrality'] = [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality] update_props_button = Button(label="Update Properties") update_props_button.on_click(update_properties) centrality_metrics = {"Degree Centrality": lambda n, weight=_: centrality_algorithms.degree_centrality(n), "Closeness Centrality": lambda n, weight=_: centrality_algorithms.closeness_centrality(n), "Betweenness Centrality": centrality_algorithms.betweenness_centrality} def update_centrality(attrname, old, new): centrality = centrality_metrics[select_centrality.value](network, weight='weight') _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.data['centrality'] = [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality] select_centrality = Select(title="Centrality Metric:", value="Degree Centrality", options=list(centrality_metrics.keys())) select_centrality.on_change('value', update_centrality)
print(fname) try: G = read_dot(os.path.join("output", fname)) nx.draw(G) except: print("cannot load graph") continue if G.number_of_nodes() == 0: print("Cannot read binary file") continue data = [] data.append(fname) data.append(G.number_of_nodes()) data.append(G.number_of_edges()) data.append(density(G)) deg_centrality = degree_centrality(G) data.extend(properties_of_array(deg_centrality)) cln_centrality = closeness_centrality(G) data.extend(properties_of_array(cln_centrality)) btn_centrality = betweenness_centrality(G) data.extend(properties_of_array(btn_centrality)) st_path = shortest_path(G) deg = [len(val) for key, val in st_path.items()] d = np.array(deg) data.extend( [np.min(d), np.max(d), np.median(d), np.mean(d), np.std(d)]) try:
corpo_pairs_list = open('./data/corpo_pairs_res.txt').readlines() G = nx.Graph() name_index_list = {} index = 0 for _pair in corpo_pairs_list: _, pair_a, _, pair_b, _ = _pair.split(',') if pair_a not in name_index_list: name_index_list[pair_a] = str(index) index += 1 if pair_b not in name_index_list: name_index_list[pair_b] = str(index) index += 1 # print(pair_a + ',' + pair_b) G.add_edge(pair_a, pair_b) # nx.draw(G) # plt.savefig("path.png") from networkx.algorithms.centrality import degree_centrality, closeness_centrality,betweenness_centrality,communicability_betweenness_centrality degree_res = degree_centrality(G) closeness_res = closeness_centrality(G) betweenness_res = communicability_betweenness_centrality(G) centrality_out = open('./data/centrality.txt', 'w') centrality_out.write('pattern,degree,closeness,betweenness') for key, value in degree_res.items(): centrality_out.write('\n' + key + '\t %.2f \t %.2f \t %.2f'%(value,closeness_res[key],betweenness_res[key])) centrality_out.close() print(degree_res) print(closeness_res) print(betweenness_res)
class Create_network(): centrality_metrics = { "Degree Centrality": lambda n, weight='_': centrality_algorithms.degree_centrality(n), "Closeness Centrality": lambda n, weight='_': centrality_algorithms.closeness_centrality(n), "Betweenness Centrality": centrality_algorithms.betweenness_centrality } community_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628', \ '#b3cde3','#ccebc5','#decbe4','#fed9a6','#ffffcc','#e5d8bd','#fddaec',\ '#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02','#a6761d','#666666'] def __init__(self, network_file, layout_file, count_path, title, width=800, thresh_val=8): self.network_file = network_file self.layout_file = layout_file self.count_path = count_path self.network_tuple = self.load_network(network_file, layout_file) self.nodes_sources_tab1 = self.column_source(self.network_tuple[1], count_path) self.network_plots_n_circle_tab1 = self.create_network_plot( self.nodes_sources_tab1, title, width) self.network_lines_tab1 = self.add_lines( self.network_tuple, self.network_plots_n_circle_tab1[0]) self.get_centrality_n_community(self.network_tuple[0], self.nodes_sources_tab1, self.network_plots_n_circle_tab1[1]) self.drop_button_tab1 = Button(label="Remove Node", button_type="warning") self.drop_button_tab1.on_click(self.remove_node_tab1) self.remove_unattached_button = Button(label="Remove unattached nodes", button_type="success") self.remove_unattached_button.on_click(self.remove_unbound_nodes) self.update_props_button = Button(label="Update Properties", button_type="warning") self.update_props_button.on_click(self.update_properties) self.update_layout_button = Button(label="Update Layout", button_type="success") self.update_layout_button.on_click(self.update_layout) self.select_centrality = Select(title="Centrality Metric:", value="Degree Centrality", options=list( self.centrality_metrics.keys())) self.select_centrality.on_change('value', self.update_centrality) self.slider = Slider(start=0, end=10, value=0, step=1, title="Threshold %") self.slider.on_change('value', self.filter_threshold) self.slider.value = thresh_val #self.filter_threshold('',0,3) def reinit(self, network_file, layout_file, count_path, title): lines_source = self.network_lines_tab1 nodes_source = self.nodes_sources_tab1 self.network_file = network_file self.layout_file = layout_file self.count_path = count_path self.network_plots_n_circle_tab1[0].title.text = title self.network_tuple = self.load_network(network_file, layout_file) network, layout = self.network_tuple print('loaded new network') nodes, nodes_coordinates = zip(*sorted(layout.items())) count_dict = dict(pickle.load(open(self.count_path, 'rb'))) nodes_xs, nodes_ys = list(zip(*nodes_coordinates)) node_occurances = [count_dict[node] for node in nodes] nodes_source.data['x'] = nodes_xs nodes_source.data['y'] = nodes_ys nodes_source.data['name'] = nodes nodes_source.data['counts'] = node_occurances lines_source.data = self.get_edges_specs(network, layout) self.update_properties() self.slider.value = 8 self.filter_threshold('', 0, 8) def load_network(self, network_file, layout_file): network = pickle.load(open(network_file, 'rb')) layout = pickle.load(open(layout_file, 'rb')) return (network, layout) def column_source(self, layout, count_path): nodes, nodes_coordinates = zip(*sorted(layout.items())) count_dict = dict(pickle.load(open(count_path, 'rb'))) nodes_xs, nodes_ys = list(zip(*nodes_coordinates)) node_occurances = [count_dict[node] for node in nodes] nodes_source = ColumnDataSource( dict(x=nodes_xs, y=nodes_ys, name=nodes, counts=node_occurances)) return nodes_source def create_network_plot(self, nodes_source, title='', width=800): plot = figure(plot_width=width, plot_height=700, tools=['tap', 'box_zoom', 'reset', 'pan', 'wheel_zoom'], title=title) plot.title.text_font = "helvica" plot.title.text_font_style = "bold" plot.title.text_font_size = "20px" plot.background_fill_color = "beige" plot.background_fill_alpha = 0.2 g1 = Circle(x='x', y='y', size=2, fill_color='blue') g1_r = plot.add_glyph(source_or_glyph=nodes_source, glyph=g1) g1_hover = HoverTool(renderers=[g1_r], tooltips=[('name', '@name'), ('count', '@counts')]) glyph_text = Text(x="x", y="y", text="name", text_color="#ff4a4a", text_font_size='6pt', text_alpha=0.7) plot.add_glyph(nodes_source, glyph_text) plot.add_tools(g1_hover) plot.grid.grid_line_color = None plot.axis.visible = False return plot, g1_r, glyph_text def get_edges_specs(self, _network, _layout): d = dict(xs=[], ys=[], alphas=[]) weights = [d['weight'] for u, v, d in _network.edges(data=True)] max_weight = max(weights) calc_alpha = lambda h: 0.1 + 0.6 * (h / max_weight) for u, v, data in _network.edges(data=True): d['xs'].append([_layout[u][0], _layout[v][0]]) d['ys'].append([_layout[u][1], _layout[v][1]]) d['alphas'].append(calc_alpha(data['weight'])) return d def add_lines(self, network_tuple, plot): lines_source = ColumnDataSource(self.get_edges_specs(*network_tuple)) r_lines = plot.multi_line('xs', 'ys', line_width=2, alpha='alphas', color='navy', source=lines_source) return lines_source def get_centrality_n_community(self, network, nodes_source, g1_r): community_colors = self.community_colors centrality = networkx.algorithms.centrality.degree_centrality(network) # first element, are nodes again _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.add( [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality], 'centrality') partition = community.best_partition(network) p_, nodes_community = zip(*sorted(partition.items())) nodes_source.add(nodes_community, 'community') nodes_source.add([community_colors[t % len(community_colors)]\ for t in nodes_community], 'community_color') g1_r.glyph.size = 'centrality' g1_r.glyph.fill_color = 'community_color' def remove_node_1_net(self, nodes_source, lines_source, network, layout): print('line 92') print(type(nodes_source.selected['1d']['indices'])) print(len(nodes_source.selected['1d']['indices'])) if (nodes_source.selected['1d']['indices']): idx = nodes_source.selected['1d']['indices'][0] else: return # update networkX network object node = nodes_source.data['name'][idx] network.remove_node(node) print('line 97') # update layout layout.pop(node) # update nodes ColumnDataSource new_source_data = dict() for col in nodes_source.column_names: print('line 104') new_source_data[col] = [ e for i, e in enumerate(nodes_source.data[col]) if i != idx ] nodes_source.data = new_source_data # update lines ColumnDataSource lines_source.data = self.get_edges_specs(network, layout) def remove_node_tab1(self): self.remove_node_1_net(self.nodes_sources_tab1, self.network_lines_tab1, *self.network_tuple) def remove_unbound_nodes(self): network, layout = self.network_tuple lines_source = self.network_lines_tab1 nodes_source = self.nodes_sources_tab1 unbound_nodes = [] for node in network.nodes(): if not network.edges(node): unbound_nodes.append(node) for node in unbound_nodes: network.remove_node(node) layout.pop(node) nodes, nodes_coordinates = zip(*sorted(layout.items())) count_dict = dict(pickle.load(open(self.count_path, 'rb'))) nodes_xs, nodes_ys = list(zip(*nodes_coordinates)) node_occurances = [count_dict[node] for node in nodes] nodes_source.data['x'] = nodes_xs nodes_source.data['y'] = nodes_ys nodes_source.data['name'] = nodes nodes_source.data['counts'] = node_occurances self.update_properties() lines_source.data = self.get_edges_specs(network, layout) def update_properties(self): community_colors = self.community_colors network, layout = self.network_tuple nodes_source = self.nodes_sources_tab1 partition = community.best_partition(network) p_, nodes_community = zip(*sorted(partition.items())) nodes_source.data['community'] = nodes_community nodes_source.data['community_color'] = [ community_colors[t % len(community_colors)] for t in nodes_community ] centrality = self.centrality_metrics[self.select_centrality.value]( network, weight='weight') _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.data['centrality'] = [ 7 + 10 * t / max(nodes_centrality) for t in nodes_centrality ] def update_centrality(self, attrname, old, new): network, _ = self.network_tuple nodes_source = self.nodes_sources_tab1 centrality = self.centrality_metrics[self.select_centrality.value]( network, weight='weight') _, nodes_centrality = zip(*sorted(centrality.items())) nodes_source.data['centrality'] = [ 7 + 10 * t / max(nodes_centrality) for t in nodes_centrality ] def update_layout(self): network, layout = self.network_tuple lines_source = self.network_lines_tab1 nodes_source = self.nodes_sources_tab1 new_layout = networkx.spring_layout(network, k=1.1 / sqrt(network.number_of_nodes()), iterations=100) layout = new_layout nodes, nodes_coordinates = zip(*sorted(layout.items())) nodes_xs, nodes_ys = list(zip(*nodes_coordinates)) nodes_source.data['x'] = nodes_xs nodes_source.data['y'] = nodes_ys lines_source.data = self.get_edges_specs(network, layout) def filter_threshold(self, attrname, old, new): network, layout = self.network_tuple if (old == new): return if (old > new): self.network_tuple = self.load_network(self.network_file, self.layout_file) network, layout = self.network_tuple weights = [d['weight'] for u, v, d in network.edges(data=True)] max_weight = max(weights) min_weight = min(weights) threshold = (new * (max_weight - min_weight) / 100.0) to_remove_list = [] sources_in = set() for (u, v, d) in network.edges(data='weight'): if (d < threshold): if (((u, v, d) in sources_in) or ((v, u, d) in sources_in)): continue to_remove_list.append((u, v)) sources_in.add((u, v, d)) network.remove_edges_from(to_remove_list) self.remove_unbound_nodes() font_size = 6 + new font_size = min(10, font_size) self.network_plots_n_circle_tab1[2].text_font_size = '{}pt'.format( font_size) self.update_layout() def return_view(self): return column(self.network_plots_n_circle_tab1[0],row(widgetbox(self.slider,self.select_centrality),\ widgetbox(self.drop_button_tab1,self.remove_unattached_button),\ widgetbox(self.update_props_button, self.update_layout_button,)))
def worker(nproc): def _print(*args, **kwargs): # Avoid printing the same stuff multiple times if nproc == 0: print(*args, **kwargs) def _regular_iterator(ls): for l in ls: yield l iterator = tqdm if nproc == 0 else _regular_iterator graph = nx.MultiDiGraph() if DIRECTIONAL_GRAPH else nx.MultiGraph() possible_targets = {} positive_train_triples = [] train_lines = count_file_lines(PATH_TRAIN) test_lines = count_file_lines(PATH_TEST) # Start and end ranges for the triples that this thread will process start_range_train = int(nproc * train_lines / N_THREADS) end_range_train = int((nproc + 1) * train_lines / N_THREADS) start_range_test = int(nproc * test_lines / N_THREADS) end_range_test = int((nproc + 1) * test_lines / N_THREADS) rels_to_study = None rels_study_path = f"datasets/{DATASET}/relations_to_study.txt" if isfile(rels_study_path): rels_to_study = [] with open(rels_study_path, "r") as f: for line in f: if line: rels_to_study.append(line.strip().split("\t")[0]) # Load the data from the training split _print("Loading training data") with open(PATH_TRAIN, "r") as f: for i, line in enumerate(f): spl = line.strip().split("\t") # Skip negative examples in the training split, since we generate our own negatives if len(spl) >= 4 and spl[3] != "1": continue s, r, t = spl[:3] if r not in possible_targets: possible_targets[r] = [] possible_targets[r].append(t) graph.add_edge(s, t, rel=r, key=r) if start_range_train <= i < end_range_train and ( rels_to_study is None or r in rels_to_study): positive_train_triples.append((s, r, t)) _print("Removing duplicate targets") # Remove duplicates from the possible targets dict for r, ls in possible_targets.items(): possible_targets[r] = list(set(ls)) with open(PATH_RELS, "r") as f: relations = [x.strip().split("\t")[0] for x in f.readlines()] # Generate the negatives by replacing the target entity with a random one # from the same range _print("Generating negatives") negative_train_triples = generate_negatives(positive_train_triples, possible_targets) labelled_triples_train = [ ((s, r, t, 1), None) for s, r, t in positive_train_triples ] + negative_train_triples _print("Computing features for the training split") training_csv = open(f"output/{DATASET}/train.csv.{nproc}", "a") centrality_indices = degree_centrality(graph) if not rels_to_study: rels_to_study = relations t1 = time.thread_time() for (s, r, t, label), orig in iterator(labelled_triples_train): fvec = get_feature_vector(graph, (s, r, t), relations, bool(label), orig, centrality_indices=centrality_indices, rels_to_study=rels_to_study) training_csv.write( f"{s},{r},{t};{label};{';'.join(str(x) for x in fvec)}\n") t2 = time.thread_time() training_csv.close() _print("Loading testing data") labelled_triples_test = [] with open(PATH_TEST, "r") as f: for i, line in enumerate(f): if start_range_test <= i < end_range_test: spl = line.strip().split("\t") s, r, t, lbl = spl[:4] if rels_to_study is None or r in rels_to_study: labelled_triples_test.append( (s, r, t, 1 if lbl == "1" else 0)) _print("Computing features for the testing split") testing_csv = open(f"output/{DATASET}/test.csv.{nproc}", "a") t3 = time.thread_time() for s, r, t, label in iterator(labelled_triples_test): try: fvec = get_feature_vector(graph, (s, r, t), relations, centrality_indices=centrality_indices, rels_to_study=rels_to_study) except NodeNotFound: # Since the testing data does not appear in the training split, # an entity present in the testing split may not appear in the # graph generated by the training split. continue testing_csv.write( f"{s},{r},{t};{label};{';'.join(str(x) for x in fvec)}\n") t4 = time.thread_time() testing_csv.close() elapsed_seconds = (t2 - t1) + (t4 - t3) with open("compute_times.txt", "a") as f: f.write( f"{DATASET};c{MAX_CONTEXT_SIZE};thread{nproc};{elapsed_seconds}\n")
def fit(self, X_df, y_array): d = {'link': np.array(y_array)} y_array = pd.DataFrame(data=d) path = os.path.dirname(__file__) self.data = pd.read_csv(os.path.join(path, 'nodes_info_new.csv'),low_memory=False) def clean_date(s): s = re.sub('[^0-9]', '', str(s)) if len(s)==0: return np.nan if s == '1': return np.nan if len(s)<4: date = int(s) else: date = int(s[:4]) if date>2000: date = int(s[:3]) return date self.data['birth_date'] = self.data['birth_date'].apply(clean_date) self.data['death_date'] = self.data['death_date'].apply(clean_date) def get_country(s): s = re.sub('[^a-zA-Z ]', '', str(s)) if len(s)==0: return np.nan return s.split()[-1] self.data['birth_place'] = self.data['birth_place'].apply(get_country) self.data['death_place'] = self.data['death_place'].apply(get_country) #defining a dictionary which contains information for each thinker according to their names self.thinker_dictionary = {} for i,row in self.data.iterrows(): self.thinker_dictionary[row['thinker']] = {'thinker_id': row['id'], 'birth_date': row['birth_date'], 'birth_place': row['birth_place'], 'death_place': row['death_place'], 'death_date': row['death_date'], 'summary': row['summary']} max_id = self.data['id'].max() self.nodes = np.arange(1,max_id+1) self.edges = np.array([[self.thinker_dictionary[row['thinker_1']]['thinker_id'],self.thinker_dictionary[row['thinker_2']]['thinker_id']] for i,row in (X_df[(y_array['link']==1).values.flatten()]).iterrows()]) self.G.add_nodes_from(self.nodes) self.G.add_edges_from(self.edges) self.graph_features = pd.DataFrame({'thinker_id':self.nodes}) self.connected_comp = list(nx.connected_components(self.G)) group_id = {} group_len = {} for think_id in self.nodes: for i,group in enumerate(self.connected_comp): if think_id in group: group_id[think_id] = i group_len[think_id] = len(group) break self.graph_features['connected_comp'] = [group_id[think_id] for think_id in self.nodes] self.graph_features['connected_comp_len'] = [group_len[think_id] for think_id in self.nodes] self.graph_features['degree_centrality'] = degree_centrality(self.G).values() self.graph_features['degree_centrality']/=self.graph_features['degree_centrality'].max() self.graph_features['eigenvector_centrality'] = eigenvector_centrality(self.G).values() self.graph_features['eigenvector_centrality']/=self.graph_features['eigenvector_centrality'].max() # self.graph_features['closeness_centrality'] = closeness_centrality(self.G).values() # self.graph_features['closeness_centrality']/=self.graph_features['closeness_centrality'].max() # self.graph_features['betweenness_centrality'] = betweenness_centrality(self.G).values() # self.graph_features['betweenness_centrality']/=self.graph_features['betweenness_centrality'].max() # self.graph_features['subgraph_centrality'] = subgraph_centrality(self.G).values() # self.graph_features['subgraph_centrality']/=self.graph_features['subgraph_centrality'].max() self.graph_features['pagerank'] = nx.pagerank(self.G, alpha=0.9).values() self.graph_features['pagerank']/=self.graph_features['pagerank'].max() return self
def data(): #read requirement file try: namafile = 'config.ini' config = configparser.ConfigParser() config.read_file(open(namafile)) rurl = config.get('redis', 'REDIS_URL') rport = config.get('redis', 'REDIS_PORT') rpass = config.get('redis', 'REDIS_PASS') except KeyError: sys.stderr.write("Tidak Bisa Membuka File" + namafile + "\n") sys.exit(1) #Networkx graph configuration G = nx.Graph() #Redis graph configuration r = redis.Redis(host=rurl, port=rport, db=0, password=rpass) #list redis keys for k in r.keys('*'): #get value from redis keys value = str(r.get(k)) #delete the header format (b') from value panjang = len(value) value = value[2:(panjang - 1)] #split the value arrvalue = value.split(',') #change data type to string root = str(k) #delete the header format (b') from root panjangkey = len(root) root = root[2:(panjangkey - 1)] for follower in arrvalue: # create edges list from key and value G.add_edge(root, follower) # Initialize graph, add nodes and edges, calculate modularity and centrality. groups = community.best_partition(G) degree = cn.degree_centrality(G) # Add node attributes for name, modularity, and three types of centrality. nx.set_node_attributes(G, groups, 'group') nx.set_node_attributes(G, degree, 'degree') # create json dictionary format for networkx edges data1 = json_graph.node_link_data(G) #output json file with open('static/data.json', 'w') as output: json.dump(data1, output, sort_keys=True, indent=4, separators=(',', ':')) return data1
def calculate_all_centralities(data): """ Calculates all four centralities metrics for the input graph Paramaters: data: a json object which represents the graph. This json is manipulated and the necessary metrics are added to it. """ G = json_graph.node_link_graph( data) #loads the data to a NetworkX graph object #Calculates three of the metrics degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweeness = centrality.betweenness_centrality(G) eigenvector_fail = False try: #Eigenvector centrality can fail to converge. eigenvector = centrality.eigenvector_centrality(DiGraph(G), max_iter=100000) except NetworkXError: #Eigenvector values will be None if calculation fails. eigenvector = [] eigenvector_fail = True print "Max iterations exceeded" degree_max = -1.0 closeness_max = -1.0 betweeness_max = -1.0 eigenvector_max = -1.0 for author in data['nodes']: #Adds the unnormalized values in the json i = author['id'] author['degreeCentralityUnnormalized'] = degree[i] author['closenessCentralityUnnormalized'] = closeness[i] author['betweennessCentralityUnnormalized'] = betweeness[i] author['eigenvectorCentralityUnnormalized'] = eigenvector[ i] if not eigenvector_fail else 1.0 #Finds the highest values for each centrality type for i in degree: if degree[i] > degree_max: degree_max = degree[i] for i in closeness: if closeness[i] > closeness_max: closeness_max = closeness[i] for i in betweeness: if betweeness[i] > betweeness_max: betweeness_max = betweeness[i] for i in eigenvector: if eigenvector[i] > eigenvector_max: eigenvector_max = eigenvector[i] #Normalizes the values for i in degree: if degree[i] != 0: degree[i] = degree[i] / degree_max for i in closeness: if closeness[i] != 0: closeness[i] = closeness[i] / closeness_max for i in betweeness: if betweeness[i] != 0: betweeness[i] = betweeness[i] / betweeness_max for i in eigenvector: if eigenvector[i] != 0: eigenvector[i] = eigenvector[i] / eigenvector_max #Adds the normalized values to the json for author in data['nodes']: i = author['id'] author['degreeCentrality'] = degree[i] author['closenessCentrality'] = closeness[i] author['betweennessCentrality'] = betweeness[i] author['eigenvectorCentrality'] = eigenvector[ i] if not eigenvector_fail else 1.0 return data
def central_it(self): self.central = centrality.degree_centrality(pg.graph)
def calculate_all_centralities(data): """ Calculates all four centralities metrics for the input graph Paramaters: data: a json object which represents the graph. This json is manipulated and the necessary metrics are added to it. """ G = json_graph.node_link_graph(data) #loads the data to a NetworkX graph object #Calculates three of the metrics degree = centrality.degree_centrality(G) closeness = centrality.closeness_centrality(G) betweeness = centrality.betweenness_centrality(G) eigenvector_fail = False try: #Eigenvector centrality can fail to converge. eigenvector = centrality.eigenvector_centrality(DiGraph(G),max_iter=100000) except NetworkXError: #Eigenvector values will be None if calculation fails. eigenvector = [] eigenvector_fail = True print "Max iterations exceeded" degree_max = -1.0 closeness_max = -1.0 betweeness_max = -1.0 eigenvector_max = -1.0 for author in data['nodes']: #Adds the unnormalized values in the json i = author['id'] author['degreeCentralityUnnormalized'] = degree[i] author['closenessCentralityUnnormalized'] = closeness[i] author['betweennessCentralityUnnormalized'] = betweeness[i] author['eigenvectorCentralityUnnormalized'] = eigenvector[i] if not eigenvector_fail else 1.0 #Finds the highest values for each centrality type for i in degree: if degree[i]>degree_max: degree_max = degree[i] for i in closeness: if closeness[i]>closeness_max: closeness_max = closeness[i] for i in betweeness: if betweeness[i]>betweeness_max: betweeness_max = betweeness[i] for i in eigenvector: if eigenvector[i]>eigenvector_max: eigenvector_max = eigenvector[i] #Normalizes the values for i in degree: if degree[i] != 0: degree[i] = degree[i]/degree_max for i in closeness: if closeness[i] != 0: closeness[i] = closeness[i]/closeness_max for i in betweeness: if betweeness[i] != 0: betweeness[i] = betweeness[i]/betweeness_max for i in eigenvector: if eigenvector[i] != 0: eigenvector[i] = eigenvector[i]/eigenvector_max #Adds the normalized values to the json for author in data['nodes']: i = author['id'] author['degreeCentrality'] = degree[i] author['closenessCentrality'] = closeness[i] author['betweennessCentrality'] = betweeness[i] author['eigenvectorCentrality'] = eigenvector[i] if not eigenvector_fail else 1.0 return data
edges = [r.split(',')[:2] for r in rows[1:]] weights = [r.split(',')[-1] for r in rows[1:]] edge_tuples=[(e[0], e[1], int(weights[i])) for i,e in enumerate(edges)] # Only get edges for the select nodes in the node csv. edges = [] for e in edge_tuples: if all(x in list(node_ids) for x in e[:2]): edges.append(e) # Initialize graph, add nodes and edges, calculate modularity and centrality. G = nx.Graph() G.add_nodes_from(list(node_ids)) G.add_weighted_edges_from(edges) groups = community.best_partition(G) degree = cn.degree_centrality(G) betweenness = cn.betweenness_centrality(G, weight='weight') eigenvector = cn.eigenvector_centrality(G, weight='weight') # Add node attributes for name, modularity, and three types of centrality. nx.set_node_attributes(G, 'name', node_dict) nx.set_node_attributes(G, 'group', groups) nx.set_node_attributes(G, 'degree', degree) nx.set_node_attributes(G, 'betweenness', betweenness) nx.set_node_attributes(G, 'eigenvector', eigenvector) # Create json representation of the graph (for d3). data = json_graph.node_link_data(G) # You could create the needed json without NetworkX (but you would forfeit network metrics). #new_data = dict(nodes=[dict(id=n) for n in list(set(nodes))], links=[dict(source=node_dict[e[0]], target=node_dict[e[1]], weight=e[2]) for e in edges])