def betweenness_removal(g, recalculate=False): """ Performs robustness analysis based on betweenness centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ m = nx.betweenness_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) x = [] y = [] dimension = fd.fractal_dimension(g, iterations=100, debug=False) n = len(g.nodes()) x.append(0) y.append(dimension) for i in range(1, n-1): g.remove_node(l.pop(0)[0]) if recalculate: m = nx.betweenness_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) dimension = fd.fractal_dimension(g, iterations=100, debug=False) x.append(i * 1. / n) y.append(dimension) return x, y
def recalculated_betweenness(ex): # Copy the graph ex = ex.copy() # Calculate betweenness of full graph between = nx.betweenness_centrality(ex, weight='distance', normalized=False) # Create a copy to track the recalculated betweenness rebetween = between while len(ex.edges()) > 0: # Recalculate betweenness between = nx.betweenness_centrality(ex, weight='distance', normalized=False) # Store recalculated values if they're higher for node, value in between.iteritems(): if value > rebetween[node]: rebetween[node] = value # Remove all edges from most central node node, value = sorted(between.items(), key=lambda x: x[1], reverse=True)[0] if (value == 0): # All remaining edges are trivial shortest paths break for tail, head in ex.edges(node): ex.remove_edge(tail, head) sys.stdout.write('.') sys.stdout.flush() print return rebetween
def betweenness(infile, recalculate = False): """ Performs robustness analysis based on betweenness centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ g = networkx.read_gml(infile) m = networkx.betweenness_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) x = [] y = [] largest_component = max(networkx.connected_components(g), key = len) n = len(g.nodes()) x.append(0) y.append(len(largest_component) * 1. / n) R = 0.0 for i in range(1, n): g.remove_node(l.pop(0)[0]) if recalculate: m = networkx.betweenness_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) largest_component = max(networkx.connected_components(g), key = len) x.append(i * 1. / n) R += len(largest_component) * 1. / n y.append(len(largest_component) * 1. / n) return x, y, 0.5 - R / n
def show_network_metrics(G): ''' Print the local and global metrics of the network ''' print(nx.info(G)) # density print("Density of the network") print(nx.density(G)) # average betweeness print("Average betweeness of the network") print(np.sum(list(nx.betweenness_centrality(G).values()))/len(nx.betweenness_centrality(G))) # Average clustering coefficient print("Average clustering coefficient:") print(nx.average_clustering(G)) #create metrics dataframe by_node_metrics = pd.DataFrame({"Betweeness_Centrality":nx.betweenness_centrality(G),"Degree_Centrality":nx.degree_centrality(G), "Clustering_Coefficient":nx.clustering(G), "Triangels":nx.algorithms.cluster.triangles(G)}) print(by_node_metrics) by_node_metrics.to_excel("metrics.xlsx")
def betweenness_fracture(infile, outfile, fraction, recalculate = False): """ Removes given fraction of nodes from infile network in reverse order of betweenness centrality (with or without recalculation of centrality values after each node removal) and saves the network in outfile. """ g = networkx.read_gml(infile) m = networkx.betweenness_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) largest_component = max(networkx.connected_components(g), key = len) n = len(g.nodes()) for i in range(1, n): g.remove_node(l.pop(0)[0]) if recalculate: m = networkx.betweenness_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) largest_component = max(networkx.connected_components(g), key = len) if i * 1. / n >= fraction: break components = networkx.connected_components(g) component_id = 1 for component in components: for node in component: g.node[node]["component"] = component_id component_id += 1 networkx.write_gml(g, outfile)
def compute_static_graph_statistics(G,start_time,end_time): verts = G.vertices n = len(verts) m = float(end_time - start_time) agg_statistics = [dict.fromkeys(verts,0),dict.fromkeys(verts,0),dict.fromkeys(verts,0)]*3 avg_statistics = [dict.fromkeys(verts,0),dict.fromkeys(verts,0),dict.fromkeys(verts,0)]*3 aggregated_graph = nx.Graph() aggregated_graph.add_nodes_from(verts) start_time = max(1,start_time) for t in xrange(start_time,end_time+1): aggregated_graph.add_edges_from(G.snapshots[t].edges_iter()) dc = G.snapshots[t].degree() cc = nx.closeness_centrality(G.snapshots[t]) bc = nx.betweenness_centrality(G.snapshots[t]) for v in verts: avg_statistics[0][v] += dc[v]/(n-1.0) avg_statistics[1][v] += cc[v] avg_statistics[2][v] += bc[v] for v in verts: avg_statistics[0][v] = avg_statistics[0][v]/m avg_statistics[1][v] = avg_statistics[1][v]/m avg_statistics[2][v] = avg_statistics[2][v]/m dc = nx.degree_centrality(aggregated_graph) cc = nx.closeness_centrality(aggregated_graph) bc = nx.betweenness_centrality(aggregated_graph) for v in verts: agg_statistics[0][v] = dc[v] agg_statistics[1][v] = cc[v] agg_statistics[2][v] = bc[v] return (agg_statistics, avg_statistics)
def compareGraphs(g1, g2): """#Compares the quantitative properties of two graph. So I can check the coarse graining. """ #Nodes and edges print 'Graph1: #(Nodes, Edges) = (' + str(len(g1.nodes())) + ', ' + str(len(g1.edges())) + ')' print 'Graph2: #(Nodes, Edges) = (' + str(len(g2.nodes())) + ', ' + str(len(g2.edges())) + ')' #Connected Components #print '\n#CCs for graph 1: ' + str(len(nx.connected_components(g1))) #print '#CCs for graph 2: ' + str(len(nx.connected_components(g2))) plt.hist([len(i) for i in nx.connected_components(g1)]) plt.hist([len(i) for i in nx.connected_components(g2)]) plt.title('Cluster Size') plt.xlabel('Cluster Size') plt.ylabel('#Cluster') show() #Degree Distribution plt.hist(nx.degree_histogram(g1)) plt.hist(nx.degree_histogram(g2)) plt.title('Degree Distribution' ) plt.xlabel('Degree') plt.ylabel('#Nodes') show() #Betweeness --- this is by far the most compuationally demanding. plt.hist(nx.betweenness_centrality(g1, normalized = False).values()) plt.hist(nx.betweenness_centrality(g2, normalized = False).values()) plt.title('Distribution of Betweenness' ) plt.xlabel('Betweenness') plt.ylabel('#Nodes') show()
def sna_calculations(g, play_file): """ :param g: a NetworkX graph object :type g: object :param play_file: the location of a play in .txt format :type play_file: string :return: returns a dictionary containing various network related figures :rtype: dict :note: also writes into results/file_name-snaCalculations.csv and results/allCharacters.csv """ file_name = os.path.splitext(os.path.basename(play_file))[0] sna_calculations_list = dict() sna_calculations_list['playType'] = file_name[0] sna_calculations_list['avDegreeCentrality'] = numpy.mean(numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avDegreeCentralityStd'] = numpy.std( numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avInDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.in_degree_centrality(g).values()), dtype=float)) sna_calculations_list['avOutDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.out_degree_centrality(g).values()), dtype=float)) try: sna_calculations_list['avShortestPathLength'] = nx.average_shortest_path_length(g) except: sna_calculations_list['avShortestPathLength'] = 'not connected' sna_calculations_list['density'] = nx.density(g) sna_calculations_list['avEigenvectorCentrality'] = numpy.mean( numpy.fromiter(iter(nx.eigenvector_centrality(g).values()), dtype=float)) sna_calculations_list['avBetweennessCentrality'] = numpy.mean( numpy.fromiter(iter(nx.betweenness_centrality(g).values()), dtype=float)) sna_calculations_list['DegreeCentrality'] = nx.degree_centrality(g) sna_calculations_list['EigenvectorCentrality'] = nx.eigenvector_centrality(g) sna_calculations_list['BetweennessCentrality'] = nx.betweenness_centrality(g) # sna_calculations.txt file sna_calc_file = csv.writer(open('results/' + file_name + '-snaCalculations.csv', 'wb'), quoting=csv.QUOTE_ALL, delimiter=';') for key, value in sna_calculations_list.items(): sna_calc_file.writerow([key, value]) # all_characters.csv file if not os.path.isfile('results/allCharacters.csv'): with open('results/allCharacters.csv', 'w') as f: f.write( 'Name;PlayType;play_file;DegreeCentrality;EigenvectorCentrality;BetweennessCentrality;speech_amount;AverageUtteranceLength\n') all_characters = open('results/allCharacters.csv', 'a') character_speech_amount = speech_amount(play_file) for character in sna_calculations_list['DegreeCentrality']: all_characters.write(character + ';' + str(sna_calculations_list['playType']) + ';' + file_name + ';' + str( sna_calculations_list['DegreeCentrality'][character]) + ';' + str( sna_calculations_list['EigenvectorCentrality'][character]) + ';' + str( sna_calculations_list['BetweennessCentrality'][character]) + ';' + str( character_speech_amount[0][character]) + ';' + str(character_speech_amount[1][character]) + '\n') all_characters.close() return sna_calculations
def __init__(self, view, controller, use_ego_betw=False, **kwargs): super(CacheLessForMore, self).__init__(view, controller) topology = view.topology() if use_ego_betw: self.betw = dict((v, nx.betweenness_centrality(nx.ego_graph(topology, v))[v]) for v in topology.nodes_iter()) else: self.betw = nx.betweenness_centrality(topology)
def weighted_betweenness_centrality_distribution(G, return_dictionary=False): """Return a distribution of weighted betweenness centralities. If return_dictionary is specified, we return a dictionary indexed by vertex name, rather than just the values (as returned by default). """ if return_dictionary: return nx.betweenness_centrality(G, weighted_edges=True) else: return nx.betweenness_centrality(G, weighted_edges=True).values()
def betweenness_centrality_distribution(G, return_dictionary=False): """Return a distribution of unweighted betweenness centralities, as used in Borges, Coppersmith, Meyer, and Priebe 2011. If return_dictionary is specified, we return a dictionary indexed by vertex name, rather than just the values (as returned by default). """ if return_dictionary: return nx.betweenness_centrality(G) else: return nx.betweenness_centrality(G).values()
def centrality_measures(self): centrality_measures = [] txt = '' # betweenness # unweighted self.unweighted_betweenness_distribution = nx.betweenness_centrality(self.G) statistics = self.Stats.get_distribution_info(self.unweighted_betweenness_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) txt += ',average betweenness centrality (unweighted)' + self.standard_text_distribution # # weighted self.weighted_betweenness_distribution = nx.betweenness_centrality(self.G, weight = self.weight_id) # statistics = self.Stats.get_distribution_info(self.weighted_betweenness_distribution) # centrality_measures.extend(statistics[:5]) # centrality_measures.extend(statistics[5]) # txt += ',average betweenness centrality (weighted)' + self.standard_text_distribution # closeness # unweighted self.unweighted_closeness_distribution = nx.closeness_centrality(self.G) statistics = self.Stats.get_distribution_info(self.unweighted_closeness_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) txt += ',average closeness centrality (unweighted)' + self.standard_text_distribution # eigen vector # right try: self.right_eigenvector_distribution = nx.eigenvector_centrality(self.G) statistics = self.Stats.get_distribution_info(self.right_eigenvector_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) except: centrality_measures.extend([0,0,0,0,0]) centrality_measures.extend([0]*len(statistics[5])) txt += ',average right eigenvector' + self.standard_text_distribution # left try: G_rev = self.G.reverse() self.lef_eigenvector_distribution = nx.eigenvector_centrality(G_rev) statistics = self.Stats.get_distribution_info(self.lef_eigenvector_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) except: centrality_measures.extend([0,0,0,0,0]) centrality_measures.extend([0]*len(statistics[5])) txt += ',average left eigenvector' + self.standard_text_distribution return [centrality_measures, txt]
def genSeedsMaxDegree(self,p,bwness): """Generate seeds based on maximum degree. Also handles Betweenness. Optional input argument sets randomization. 0<p<1""" numSeeds = self.numSeeds if bwness: numSeeds = numSeeds*1.5 if bwness: k_val = int(2000/math.sqrt(len(self.adj))) if k_val > len(self.adj): bw_node = nx.betweenness_centrality(self.nxgraph) else: bw_node = nx.betweenness_centrality(self.nxgraph, k = k_val ) numMax=int(self.numSeeds/(1.0*p)) seeds=[None]*numMax deg=[0]*numMax for key,value in self.adj.iteritems(): #fill seeds curr_deg=len(value) for j in range(numMax): if curr_deg>deg[j]: deg.insert(j,curr_deg) seeds.insert(j,key) break seeds=seeds[:numMax] deg=deg[:numMax] if bwness: numMax=int(self.numSeeds/(1.0*p)) dict_bw = bw_node seeds_degree = seeds seeds = dict() for node in seeds_degree: value = dict_bw.get(node) key = node seeds[key] = value seeds_fin = dict(sorted(seeds.iteritems(), key=operator.itemgetter(1), reverse=True)[:numMax]) seeds = seeds_fin.keys() #shuffle if p!=1: random.shuffle(seeds) return seeds[:self.numSeeds]
def node_graph(tup): h=nx.Graph() h.add_edges_from(tup) print "edges:" ,h.edges() #%matplotlib inline BLUE="#99CCFF" nx.draw(h, node_color=BLUE,with_labels=True) print "Degree Distribution:",h.degree() print "Degree Centrality:",nx.degree_centrality(h) print "Betweenness Centrality : ",nx.betweenness_centrality(h) print "Betweenness Centrality Non-Normalized : ",nx.betweenness_centrality(h, normalized=False) print "Closeness Centrality:", nx.closeness_centrality(h) pyplot.show()
def betweenness_apl(g, recalculate=False): """ Performs robustness analysis based on betweenness centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ m = networkx.betweenness_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) x = [] y = [] average_path_length = 0.0 number_of_components = 0 n = len(g.nodes()) for sg in networkx.connected_component_subgraphs(g): average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length = average_path_length / number_of_components initial_apl = average_path_length x.append(0) y.append(average_path_length * 1. / initial_apl) r = 0.0 for i in range(1, n): g.remove_node(l.pop(0)[0]) if recalculate: m = networkx.betweenness_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) average_path_length = 0.0 number_of_components = 0 for sg in networkx.connected_component_subgraphs(g): if len(sg.nodes()) > 1: average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length = average_path_length / number_of_components x.append(i * 1. / initial_apl) r += average_path_length y.append(average_path_length) return x, y, r / initial_apl
def betweenness_centrality(self, withme=False, node=None,average=False): if node==None: if withme: my_dict = nx.betweenness_centrality(self.mynet) new = {} new2={} for i in my_dict: new[self.id_to_name(i)] = my_dict[i] new2[i] = my_dict[i] if average: print "The average is " + str(round(sum(new.values())/float(len(new.values())),4)) else: for i,j in new.items(): print i, round(j,4) return new2 else: my_dict = nx.betweenness_centrality(self.no_ego_net) new = {} new2={} for i in my_dict: new[self.id_to_name(i)] = my_dict[i] new2[i] = my_dict[i] if average: print "The average is " + str(round(sum(new.values())/float(len(new.values())),4)) else: for i,j in new.items(): print i, round(j,4) return new2 else: if withme: my_dict = nx.betweenness_centrality(self.mynet) try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4)) except: try: return my_dict[self.name_to_id(node)] except: print "Invalid node name" else: my_dict = nx.betweenness_centrality(self.no_ego_net) try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4)) except: try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[[self.name_to_id(node)]],4)) except: print "Invalid node name"
def betweenValue(charList, graphFile, bookNetworksPath): # Compute betweenness for all characters in the current chapter graph. g = nx.read_gexf(graphFile) betCentrality = nx.betweenness_centrality(g, k=None, normalized=True, weight="Weight", endpoints=False, seed=None) betweenValues = betCentrality.values() # NORMALISE betweenness values d = decimal.Decimal maxBetween = max(betweenValues) minBetween = min(betweenValues) maxMinusMin = d(maxBetween) - d(minBetween) if not charList: # Get top 10 overall characters from overall.gexf graph overallGraphFile = bookNetworksPath + "overall.gexf" overall_g = nx.read_gexf(overallGraphFile) overallBetweenCent = nx.betweenness_centrality( overall_g, k=None, normalized=True, weight="Weight", endpoints=False, seed=None ) # Quick fix for getting all characters. # sortedCentrality = dict(sorted(overallBetweenCent.iteritems(), key=itemgetter(1), reverse=True)[:10]) sortedCentrality = dict(sorted(overallBetweenCent.iteritems(), key=itemgetter(1), reverse=True)) sortedCentrality = sorted(sortedCentrality.iteritems(), key=itemgetter(1), reverse=True) charList = [seq[0] for seq in sortedCentrality] return charList else: charList = [item for item in charList] for index, item in enumerate(charList): currentChar = None for key, value in betCentrality.iteritems(): if key == item: nummerator = d(value) - d(minBetween) if nummerator == 0: charList[index] = (key, str(0)) else: norm_value = (d(value) - d(minBetween)) / d(maxMinusMin) charList[index] = (key, str(norm_value)) currentChar = key # If current character is not present in the current chapter assign 0 influence. if not currentChar: charList[index] = (item, 0) return charList
def build_graph(): pair_list = TwitterUser.get_top_100_pair() DG = nx.DiGraph() DG.add_edges_from([(foer, twitter_user) for twitter_user, foer in pair_list]) betweenness = nx.betweenness_centrality(DG) closeness = nx.closeness_centrality(DG) edge_betweenness = nx.edge_betweenness(DG) clustering_co = nx.clustering(nx.Graph(DG)) page_rank = nx.pagerank(DG) for twitter_id in DG.nodes(): t = TwitterUser.get_by_id(twitter_id) node = DG.node[twitter_id] node['user_id'] = t.user_id node['label'] = t.scrn_name node['follower_count'] = t.foer_cnt node['friend_count'] = t.friend_cnt node['status_count'] = t.status_cnt node['location'] = t.location node['verified'] = t.verified node['twitter_age'] = (date.today() - t.created_at).days node['daily_tweet'] = t.status_cnt*1.0/node['twitter_age'] node['indegree'] = len([(id, foer) for id, foer in pair_list if id == twitter_id]) node['outdegree'] = len([(id, foer) for id, foer in pair_list if foer == twitter_id]) node['cluster'] = clustering_co[twitter_id] node['betweenness'] = betweenness[twitter_id] node['closeness'] = closeness[twitter_id] node['page_rank'] = page_rank[twitter_id] for out_n, in_n in DG.edges(): DG[out_n][in_n]['edge_betweenness'] = edge_betweenness[(out_n,in_n)] return DG
def __init__(self, graph, node_1=None, node_2=None): self.graph = graph self.node_1 = node_1 self.node_2 = node_2 self.clustering_dict = nx.clustering(graph) self.betweenness_dict = nx.betweenness_centrality(graph) self.average_neighbor_degree_dict = nx.average_neighbor_degree(graph) self.attributes_map = { "adamic_adar_similarity": self.adamic_adar_similarity, "average_clustering_coefficient": self.average_clustering_coefficient, "average_neighbor_degree_sum": self.average_neighbor_degree_sum, "betweenness_centrality": self.betweenness_centrality, "closeness_centrality_sum": self.closeness_centrality_sum, "clustering_coefficient_sum": self.clustering_coefficient_sum, "common_neighbors": self.common_neighbors, "cosine": self.cosine, "jaccard_coefficient": self.jaccard_coefficient, "katz_measure": self.katz_measure, "preferential_attachment": self.preferential_attachment, "square_clustering_coefficient_sum": self.square_clustering_coefficient_sum, "sum_of_neighbors": self.sum_of_neighbors, "sum_of_papers": self.sum_of_papers, "get_shortest_path_length": self.get_shortest_path_length, "get_second_shortest_path_length": self.get_second_shortest_path_length } if(self.node_1 != None and self.node_2 != None): self.neighbors_1 = self.all_neighbors(self.node_1) self.neighbors_2 = self.all_neighbors(self.node_2)
def __nfur_func(topology, edges, betweenness): """ Calculate NFUR on a specific set of edges Parameters ---------- topology : Topology The topology edges : list The list of edges (subset of topology edges) betweenness : dict The betweeness centrality of the topology, keyed by node Returns ------- nfur : dict NFUR values keyed by node, only relative to failures of the specified edges """ nfur = betweenness.copy() topology = topology.copy() for u, v in edges: edge_attr = topology.edge[u][v] topology.remove_edge(u, v) betw = nx.betweenness_centrality(topology, normalized=False, weight='weight') for node in betw.keys(): if betw[node] > nfur[node]: nfur[node] = betw[node] topology.add_edge(u, v, edge_attr) return nfur
def test_fast_versions_properties_threshold_graphs(self): cs='ddiiddid' G=nxt.threshold_graph(cs) assert_equal(nxt.density('ddiiddid'), nx.density(G)) assert_equal(sorted(nxt.degree_sequence(cs)), sorted(G.degree().values())) ts=nxt.triangle_sequence(cs) assert_equal(ts, list(nx.triangles(G).values())) assert_equal(sum(ts) // 3, nxt.triangles(cs)) c1=nxt.cluster_sequence(cs) c2=list(nx.clustering(G).values()) assert_almost_equal(sum([abs(c-d) for c,d in zip(c1,c2)]), 0) b1=nx.betweenness_centrality(G).values() b2=nxt.betweenness_sequence(cs) assert_true(sum([abs(c-d) for c,d in zip(b1,b2)]) < 1e-14) assert_equal(nxt.eigenvalues(cs), [0, 1, 3, 3, 5, 7, 7, 8]) # Degree Correlation assert_true(abs(nxt.degree_correlation(cs)+0.593038821954) < 1e-12) assert_equal(nxt.degree_correlation('diiiddi'), -0.8) assert_equal(nxt.degree_correlation('did'), -1.0) assert_equal(nxt.degree_correlation('ddd'), 1.0) assert_equal(nxt.eigenvalues('dddiii'), [0, 0, 0, 0, 3, 3]) assert_equal(nxt.eigenvalues('dddiiid'), [0, 1, 1, 1, 4, 4, 7])
def relevant_stats(G): cloC = nx.closeness_centrality(G, distance = 'distance') betC = nx.betweenness_centrality(G, weight = 'distance') katC = nx.katz_centrality(G) eigC = nx.eigenvector_centrality(G) return
def __init__(self, n=1000, k=10, p=0.02947368): self.n = n self.k = k self.p = p self.ws = nx.watts_strogatz_graph(self.n, self.k, self.p, seed='nsll') nx.set_node_attributes(self.ws, 'SIR', 'S') self.clustering = nx.clustering(self.ws) self.betweenness = nx.betweenness_centrality(self.ws) p_r_0 = 0.001 r_0 = int(self.n * p_r_0) if r_0 < 1: r_0 = 1 random.seed('nsll') self.r = random.sample(self.ws.nodes(), r_0) i_0 = 4 if i_0 < r_0: i_0 += 1 random.seed('nsll') self.infected = random.sample(self.ws.nodes(), i_0) for n in self.infected: self.ws.node[n]['SIR'] = 'I' for n in self.r: self.ws.node[n]['SIR'] = 'R' self.s = self.n - len(self.infected) - len(self.r) print(self.r) print(self.infected)
def print_top_betweenness(component, size=10): bc = nx.betweenness_centrality(component, weight='weight', normalized=True) for node in sorted(bc, key=bc.get, reverse=True)[0:size]: query = {'spec': {'user.id': int(node) }, 'fields':{'_id':0,'user.screen_name': 1} } this_data = bf.query_mongo_get_list(query, limit=1) print this_data['user']['screen_name'],'&', "{0:.4f}".format(bc[node]), '\\\\' return bc
def plot_betweenness_dist (graph, path): """Plot distribution of betweenness centrality of the graph and save the figure at the given path. On X-axis we have betweenness centrality values and on Y-axis we have percentage of the nodes that have that betweenness value. k is the number of samples for estimating the betweenness centrality.""" N = float(graph.order()) node_to_betweenness = nx.betweenness_centrality(graph) betweenness_to_percent = {} # calculate percentages of nodes with certain betweeness value for node in node_to_betweenness: betweenness_to_percent[node_to_betweenness[node]] = 1 + \ betweenness_to_percent.get(node_to_betweenness[node], 0) for c in betweenness_to_percent: betweenness_to_percent[c] = betweenness_to_percent[c] / N * 100 x = sorted(betweenness_to_percent.keys(), reverse = True) y = [betweenness_to_percent[i] for i in x] plt.loglog(x, y, 'b-', marker = '.') plt.title("Betweenness Centrality Distribution") plt.ylabel("Percentage") plt.xlabel("Betweenness value") plt.axis('tight') plt.savefig(path)
def test_florentine_families_graph(self): """Weighted betweenness centrality: Florentine families graph""" G=nx.florentine_families_graph() b_answer=\ {'Acciaiuoli': 0.000, 'Albizzi': 0.212, 'Barbadori': 0.093, 'Bischeri': 0.104, 'Castellani': 0.055, 'Ginori': 0.000, 'Guadagni': 0.255, 'Lamberteschi': 0.000, 'Medici': 0.522, 'Pazzi': 0.000, 'Peruzzi': 0.022, 'Ridolfi': 0.114, 'Salviati': 0.143, 'Strozzi': 0.103, 'Tornabuoni': 0.092} b=nx.betweenness_centrality(G, weight='weight', normalized=True) for n in sorted(G): assert_almost_equal(b[n],b_answer[n],places=3)
def return_average_betweenness_centralities(path): f = open(path,'r'); dct = json.loads(f.read()) f.close() ct_avg = -1 whole_avg = -1 try: dg = json_dag.JsonToDag(path) dg.add_nodes() dg.add_dependencies() G = dg.G critical_path, stats_result = dg.findCriticalPath() G_directed = G.to_undirected() bt = nx.betweenness_centrality(G) #bt_edge = nx.edge_betweenness_centrality(G) sm = 0 for element in critical_path: sm += bt[element] ct_avg = sm/ float(len(critical_path)) whole_avg = sum(bt.values())/(float(len(bt))); except: print "error" pass return ct_avg, whole_avg
def calculate_betweenness(graph): ''' Calculate betweenness centrality of a node, sets value on node as attribute; returns graph, and dict of the betweenness centrality values ''' g = graph bc=nx.betweenness_centrality(g) nx.set_node_attributes(g,'betweenness',bc) return g, bc
def btw_centrality_month_airports(data): df = data.copy() df['DateOfDeparture'] = pd.to_datetime(df['DateOfDeparture']) df['month'] = df['DateOfDeparture'].dt.week.astype(str) df['year'] = df['DateOfDeparture'].dt.year.astype(str) df['year_month'] = df[['month','year']].apply(lambda x: '-'.join(x),axis=1) df['year_month_dep'] = df[['Departure','month','year']].apply(lambda x: '-'.join(x),axis=1) df['year_month_arr'] = df[['Arrival','month','year']].apply(lambda x: '-'.join(x),axis=1) year_month = pd.unique(df['year_month']) G = nx.Graph() btw_centrality = {} for i, item in enumerate(year_month): sub_df = df[df['year_month'] == item][['Departure','Arrival']] list_dep_arr = zip(sub_df['Departure'], sub_df['Arrival']) G.add_edges_from(list_dep_arr) #G.number_of_nodes() #G.number_of_edges() centrality_month = nx.betweenness_centrality(G) centrality_month = pd.DataFrame(centrality_month.items()) centrality_month['year_month'] = [item] * centrality_month.shape[0] centrality_month['airport_year_month'] = centrality_month[centrality_month.columns[[0,2]]].apply(lambda x: '-'.join(x),axis=1) centrality_month =dict(zip(centrality_month['airport_year_month'], centrality_month[1])) z = btw_centrality.copy() z.update(centrality_month) btw_centrality = z df['btw_centrality_month_dep'] = df['year_month_dep'].map(btw_centrality) df['btw_centrality_month_arr'] = df['year_month_arr'].map(btw_centrality) return df
def get_center_ego(graph): bt = nx.betweenness_centrality(graph) print(bt) for (node, betweenness) in sorted(bt.items(), key=lambda x: x[1], reverse=True): nodes = nx.ego_graph(graph, node).nodes() print(nodes) return nodes
if __name__ == '__main__': # 단어쌍 동시출현 빈도수를 담았던 networkx.csv파일을 불러온다. dataset = pd.read_csv('D:\crawling\\networkx.csv') # 중심성 척도 계산을 위한 Graph를 만든다 G_centrality = nx.Graph() # 빈도수가 20000 이상인 단어쌍에 대해서만 edge(간선)을 표현한다. for ind in range((len(np.where(dataset['freq'] >= 19700)[0]))): G_centrality.add_edge(dataset['word1'][ind], dataset['word2'][ind], weight=int(dataset['freq'][ind])) dgr = nx.degree_centrality(G_centrality) # 연결 중심성 btw = nx.betweenness_centrality(G_centrality) # 매개 중심성 cls = nx.closeness_centrality(G_centrality) # 근접 중심성 egv = nx.eigenvector_centrality(G_centrality) # 고유벡터 중심성 pgr = nx.pagerank(G_centrality) # 페이지 랭크 # 중심성이 큰 순서대로 정렬한다. sorted_dgr = sorted(dgr.items(), key=operator.itemgetter(1), reverse=True) sorted_btw = sorted(btw.items(), key=operator.itemgetter(1), reverse=True) sorted_cls = sorted(cls.items(), key=operator.itemgetter(1), reverse=True) sorted_egv = sorted(egv.items(), key=operator.itemgetter(1), reverse=True) sorted_pgr = sorted(pgr.items(), key=operator.itemgetter(1), reverse=True) # 단어 네트워크를 그려줄 Graph 선언 G = nx.Graph() # 페이지 랭크에 따라 두 노드 사이의 연관성을 결정한다. (단어쌍의 연관성)
# nx.write_gexf(subgraph, 'beehive-sub.gexf') triadic_closure = nx.transitivity(G) print('Triadic closure:', triadic_closure) degree_dict = dict(G.degree(G.nodes())) nx.set_node_attributes(G, degree_dict, 'degree') print(G.nodes['poverty']) sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True) print('Top 20 nodes by degree:') for d in sorted_degree[:20]: print(d) betweenness_dict = nx.betweenness_centrality(G) eigenvector_dict = nx.eigenvector_centrality(G) nx.set_node_attributes(G, betweenness_dict, 'betweenness') nx.set_node_attributes(G, eigenvector_dict, 'eigenvector') sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True) print('Top 20 nodes by betweeness centrality:') for b in sorted_betweenness[:20]: print(b) sorted_eigenvector = sorted(eigenvector_dict.items(), key=itemgetter(1),
def betweenness_centrality(G, nodes): r"""Compute betweenness centrality for nodes in a bipartite network. Betweenness centrality of a node `v` is the sum of the fraction of all-pairs shortest paths that pass through `v`. Values of betweenness are normalized by the maximum possible value which for bipartite graphs is limited by the relative size of the two node sets [1]_. Let `n` be the number of nodes in the node set `U` and `m` be the number of nodes in the node set `V`, then nodes in `U` are normalized by dividing by .. math:: \frac{1}{2} [m^2 (s + 1)^2 + m (s + 1)(2t - s - 1) - t (2s - t + 3)] , where .. math:: s = (n - 1) \div m , t = (n - 1) \mod m , and nodes in `V` are normalized by dividing by .. math:: \frac{1}{2} [n^2 (p + 1)^2 + n (p + 1)(2r - p - 1) - r (2p - r + 3)] , where, .. math:: p = (m - 1) \div n , r = (m - 1) \mod n . Parameters ---------- G : graph A bipartite graph nodes : list or container Container with all nodes in one bipartite node set. Returns ------- betweenness : dictionary Dictionary keyed by node with bipartite betweenness centrality as the value. See Also -------- degree_centrality, closeness_centrality, networkx.algorithms.bipartite.basic.sets, networkx.algorithms.bipartite.basic.is_bipartite Notes ----- The nodes input parameter must contain all nodes in one bipartite node set, but the dictionary returned contains all nodes from both node sets. References ---------- .. [1] Borgatti, S.P. and Halgin, D. In press. "Analyzing Affiliation Networks". In Carrington, P. and Scott, J. (eds) The Sage Handbook of Social Network Analysis. Sage Publications. http://www.steveborgatti.com/papers/bhaffiliations.pdf """ top = set(nodes) bottom = set(G) - top n = float(len(top)) m = float(len(bottom)) s = (n - 1) // m t = (n - 1) % m bet_max_top = (((m**2) * ((s + 1)**2)) + (m * (s + 1) * (2 * t - s - 1)) - (t * ((2 * s) - t + 3))) / 2.0 p = (m - 1) // n r = (m - 1) % n bet_max_bot = (((n**2) * ((p + 1)**2)) + (n * (p + 1) * (2 * r - p - 1)) - (r * ((2 * p) - r + 3))) / 2.0 betweenness = nx.betweenness_centrality(G, normalized=False, weight=None) for node in top: betweenness[node] /= bet_max_top for node in bottom: betweenness[node] /= bet_max_bot return betweenness
node_size=node_size, node_color=node_color, alpha=0.7, with_labels=True, width=edge_width, edge_color='.4', cmap=plt.cm.Blues) plt.savefig("DevelopersWeightedCircular.png") # In[14]: #colored import networkx as nx G_fb = nx.read_edgelist('partnerDevelopers.txt', create_using=nx.Graph(), nodetype=str) pos = nx.spring_layout(G_fb) betCent = nx.betweenness_centrality(G_fb, normalized=True, endpoints=True) node_color = [100 * G_fb.degree(v) for v in G_fb] node_size = [v * 10000 for v in betCent.values()] plt.figure(figsize=(20, 20)) nx.draw_networkx(G_fb, pos=pos, with_labels=True, node_color=node_color, node_size=node_size) plt.savefig("DevelopersColored.png") # In[ ]:
def Decomposition(value, timepass, q): global masti, n, G, mymap a = 0 if (masti[value] == -1 or G.has_node(value)): mymap[value] = 0 for nbr in G[value]: #Not necessary but keeping it mymap[nbr] = mymap[nbr] - 1 G.remove_node(value) visited = defaultdict(lambda: 0) for i in range(1, n + 1): visited[i] = False for i in range(1, n + 1): if (mymap[i] == 0 and masti[i] == -1): masti[i] = value store = defaultdict(lambda: 0) if ((visited[i] == False) and timepass[i] == value and masti[i] == -1 and mymap[i] > 0): DFSUtil(i, visited, timepass, value) baby = 0 for j in range(1, n + 1): store[j] = -1 for h in range(1, n + 1): if (timepass[h] == -10): store[h] = 1 baby = baby + 1 baby1 = h if (baby == 1): masti[baby1] = value break gr = nx.Graph() for g in range(1, n + 1): if (store[g] == 1): for f in range(g + 1, n + 1): if (store[f] == 1): for d in G[g]: if (d == f): gr.add_node(g) gr.add_node(f) gr.add_edge(g, f) gr.add_edge(f, g) if (gr.number_of_edges() < 4 * gr.number_of_nodes() and gr.number_of_nodes() < 800): pr = nx.betweenness_centrality(gr) elif (gr.number_of_nodes() < 2000 and 4 * gr.number_of_nodes() > gr.number_of_edges()): pr = nx.betweenness_centrality( gr, k=max(1, gr.number_of_nodes() // 8)) elif (gr.number_of_nodes() < 5000 and 10 * gr.number_of_nodes() > gr.number_of_edges()): pr = nx.betweenness_centrality( gr, k=max(1, gr.number_of_nodes() // 32)) elif (gr.number_of_nodes() < 20000 and 10 * gr.number_of_nodes() > gr.number_of_edges()): pr = nx.betweenness_centrality( gr, k=max(1, gr.number_of_nodes() // 2000)) elif (gr.number_of_nodes() < 50000 and 10 * gr.number_of_nodes() > gr.number_of_edges()): pr = nx.betweenness_centrality( gr, k=max(1, gr.number_of_nodes() // 20000)) elif (gr.number_of_nodes() < 200000 and 1.5 * gr.number_of_nodes() > gr.number_of_edges()): pr = nx.betweenness_centrality( gr, k=max(1, gr.number_of_nodes() // 80000)) elif (gr.number_of_nodes() < 600000 and 1.2 * gr.number_of_nodes() > gr.number_of_edges()): pr = nx.betweenness_centrality( gr, k=max(1, gr.number_of_nodes() // 128000)) elif (gr.number_of_nodes() < 1200000 and 1.1 * gr.number_of_nodes() > gr.number_of_edges()): pr = nx.betweenness_centrality( gr, k=max(1, gr.number_of_nodes() // 320000)) else: pr = nx.betweenness_centrality( gr, k=max(1, gr.number_of_nodes() // 400000)) nextNode = max(pr, key=pr.get) for m in range(1, n + 1): if (timepass[m] == -10): timepass[m] = nextNode masti[nextNode] = value gr.clear() store.clear() if (mymap[nextNode] > 0): q.append(nextNode) visited.clear() while (q): if (value == 0): break aese = q[0] q.popleft() Decomposition(aese, timepass, q)
def centralityAnalysis(repo: git.Repo, commits: List[git.Commit], outputDir: str): allRelatedAuthors = {} authorCommits = Counter({}) # for all commits... print("Analyzing centrality") for commit in Bar('Processing').iter(commits): author = commit.author.email # increase author commit count authorCommits.update({author: 1}) # initialize dates for related author analysis commitDate = datetime.fromtimestamp(commit.committed_date) earliestDate = commitDate + relativedelta(months=-1) latestDate = commitDate + relativedelta(months=+1) # find authors related to this commit # commitRelatedCommits = commit.iter_items( # repo, 'master', # after=earliestDate.strftime('%Y-%m-%d'), # before=latestDate.strftime('%Y-%m-%d')) commitRelatedCommits = filter( lambda c: findRelatedCommits(author, earliestDate, latestDate, c), commits) commitRelatedAuthors = set( list(map(lambda c: c.author.email, commitRelatedCommits))) # get current related authors collection and update it authorRelatedAuthors = allRelatedAuthors.setdefault(author, set()) authorRelatedAuthors.update(commitRelatedAuthors) # prepare graph print("Preparing NX graph") G = nx.Graph() for author in allRelatedAuthors: for relatedAuthor in allRelatedAuthors[author]: G.add_edge(author.strip(), relatedAuthor.strip()) # analyze graph closeness = dict(nx.closeness_centrality(G)) betweenness = dict(nx.betweenness_centrality(G)) centrality = dict(nx.degree_centrality(G)) density = nx.density(G) modularity = list(greedy_modularity_communities(G)) print("Outputting CSVs") # output non-tabular results with open(os.path.join(outputDir, 'project.csv'), 'a', newline='') as f: w = csv.writer(f, delimiter=',') w.writerow(['Density', density]) w.writerow(['Community Count', len(modularity)]) # output community information with open(os.path.join(outputDir, 'community.csv'), 'a', newline='') as f: w = csv.writer(f, delimiter=',') w.writerow(['Community Index', 'Author Count', 'Commit Count']) for idx, community in enumerate(modularity): communityCommitCount = sum(authorCommits[author] for author in community) w.writerow([idx + 1, len(modularity[idx]), communityCommitCount]) # combine centrality results combined = {} for key in closeness: single = { 'Author': key, 'Closeness': closeness[key], 'Betweenness': betweenness[key], 'Centrality': centrality[key] } combined[key] = single # output tabular results with open(os.path.join(outputDir, 'centrality.csv'), 'w', newline='') as f: w = csv.DictWriter( f, ['Author', 'Closeness', 'Betweenness', 'Centrality']) w.writeheader() for key in combined: w.writerow(combined[key]) # output graph to PNG print("Outputting graph to PNG") graphFigure = plt.figure(5, figsize=(30, 30)) nx.draw(G, with_labels=True, node_color='orange', node_size=4000, edge_color='black', linewidths=2, font_size=20) graphFigure.savefig(os.path.join(outputDir, 'graph.png'))
def main(simulated_time): random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) """ TOPOLOGY from a json """ t = Topology() t.G = nx.read_graphml("Euclidean.graphml") ls = list(t.G.nodes) li = {x: int(x) for x in ls} nx.relabel_nodes(t.G, li, False) #Transform str-labels to int-labels print "Nodes: %i" % len(t.G.nodes()) print "Edges: %i" % len(t.G.edges()) #MANDATORY fields of a link # Default values = {"BW": 1, "PR": 1} valuesOne = dict(itertools.izip(t.G.edges(), np.ones(len(t.G.edges())))) nx.set_edge_attributes(t.G, name='BW', values=valuesOne) nx.set_edge_attributes(t.G, name='PR', values=valuesOne) centrality = nx.betweenness_centrality(t.G) nx.set_node_attributes(t.G, name="centrality", values=centrality) sorted_clustMeasure = sorted(centrality.items(), key=operator.itemgetter(1), reverse=True) top20_devices = sorted_clustMeasure[0:20] main_fog_device = copy.copy(top20_devices[0][0]) # df = pd.read_csv("pos_network.csv") # pos = {} # for r in df.iterrows(): # lat = r[1].x # lng = r[1].y # pos[r[0]] = (lat, lng) # fig = plt.figure(figsize=(10, 8), dpi=100) # nx.draw(t.G, with_labels=True,pos=pos,node_size=60,node_color="orange", font_size=8) # plt.savefig('labels.png') # exit() print "-" * 20 print "Best top centralized device: ", main_fog_device print "-" * 20 """ APPLICATION """ app1 = create_application("app1") """ PLACEMENT algorithm """ #There are not modules to place. placement = NoPlacementOfModules("NoPlacement") """ POPULATION algorithm """ number_generators = int(len(t.G) * 0.1) print "Number of generators %i" % number_generators #you can use whatever funciton to change the topology dStart = deterministicDistributionStartPoint(500, 400, name="Deterministic") pop = Population_Move(name="mttf-nodes", srcs=number_generators, node_dst=main_fog_device, activation_dist=dStart) pop.set_sink_control({ "id": main_fog_device, "number": number_generators, "module": app1.get_sink_modules() }) dDistribution = deterministicDistribution(name="Deterministic", time=100) pop.set_src_control({ "number": 1, "message": app1.get_message("M.Action"), "distribution": dDistribution }) #In addition, a source includes a distribution function: """-- SELECTOR algorithm """ selectorPath = CloudPath_RR() """ SIMULATION ENGINE """ s = Sim(t, default_results_path="Results_%s" % (simulated_time)) s.deploy_app(app1, placement, pop, selectorPath) s.run(simulated_time, test_initial_deploy=False, show_progress_monitor=False) # s.draw_allocated_topology() # for debugging s.print_debug_assignaments()
import networkx as nx import json G = nx.Graph() fp = open("Cit-HepPh - Copy.txt", 'r') fp.readline() fp.readline() fp.readline() fp.readline() while True: line = fp.readline() if not line: break tk = line.split('\t') G.add_edge(int(tk[0]), int(tk[1])) #dg=nx.degree_centrality(G) #cc=nx.closeness_centrality(G, normalized=True) bc = nx.betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None) #ec=nx.edge_betweenness_centrality(G, normalized=True, weight=None) #eg=nx.eigenvector_centrality_numpy(G) #json.dump(dg,open("degree_centrality.txt",'w')) #json.dump(cc,open("closeness.txt",'w')) json.dump(bc, open("betweeness.txt", 'w')) #json.dump(ec,open("edge_betweeness.txt",'w')) #json.dump(eg,open("eigenvector.txt",'w')) fp.close()
def get_centralities(compare): params = [5000, 2000, 1000, 500, 100, 50, 40, 30, 20, 10, 5, 4, 3, 2, 1, 0] #[300000, 150000, 100000, 50000, 35000, 20000, 14000, 10000, 5000, 2000, 1000, 500, 100, 50, 30, 20, 10, 5, 1] folderout = 'networks/backboning_centralities/' if not os.path.exists(folderout): os.makedirs(folderout) time_nx = [] time_ig = [] ftimes = open(folderout + 'compare_comp_time.dat', 'w') ftimes.write('nc\tt_nx\tt_ig\n') for nc in params: ''' NETWORKX ''' edges_nx = [] t1 = time.time() print 'Parse edges' for ind, line in enumerate( open('networks/backboning/nc_backboned_' + str(nc))): if 'nij' not in line: e1, e2, w, sign = line.strip().split('\t') edges_nx.append((e1, e2, {'weight': float(w)})) G_nx = nx.Graph() G_nx.add_edges_from(edges_nx) GC_nx = [ c for c in sorted( nx.connected_components(G_nx), key=len, reverse=True) ][0] print nc, '\tGet NC degrees' degrees_nx = add_df_meas(nx.degree_centrality(G_nx), 'degree_nx') print nc, '\tGet NC clustering' clusterings_nx = add_df_meas(nx.clustering(G_nx), 'clustering_nx') print nc, '\tGet NC pageranks' pageranks_nx = add_df_meas(nx.pagerank(G_nx), 'pagerank_nx') print nc, '\tGet NC betweenness' betweennesses_nx = add_df_meas(nx.betweenness_centrality(G_nx), 'betweenness_nx') print nc, '\tGet NC closeness' closenesses_nx = add_df_meas(nx.closeness_centrality(G_nx), 'closeness_nx') #print 'Get eigenvector' #eigenvectors_nx = add_df_meas(nx.eigenvector_centrality(G_nx), 'eigenvector_mx') print nc, '\tGet NC constraint' constraints_nx = add_df_meas(nx.constraint(G_nx), 'constraint_nx') df_nx = degrees_nx.merge(clusterings_nx, left_index=True, right_index=True) df_nx = df_nx.merge(pageranks_nx, left_index=True, right_index=True) df_nx = df_nx.merge(betweennesses_nx, left_index=True, right_index=True) df_nx = df_nx.merge(closenesses_nx, left_index=True, right_index=True) df_nx = df_nx.merge(constraints_nx, left_index=True, right_index=True) t2 = time.time() t_nx = t2 - t1 time_nx.append(t_nx) print 'Time for NX: ', round(t_nx, 2), ' s' ''' IGRAPH ''' # get the igraph network t1 = time.time() ftempname = 'tempfile_nc_backboned' + str(nc) ftemp = open(ftempname, 'w') for line in open('networks/backboning/nc_backboned_' + str(nc)): if 'src' not in line: ftemp.write('\t'.join(line.strip().split('\t')[0:3]) + '\n') ftemp.close() G_ig = Graph.Read_Ncol(ftempname, weights=True, directed=False) os.remove(ftempname) # get degree thats matching # nw computes degree centrality, which is the k/(N-1), while ig computes k # https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.algorithms.centrality.degree_centrality.html print '\n', nc, '\tGet IG degrees' degrees_ig = {} G_ig.vs['degree_ig'] = G_ig.degree() N = len(G_ig.vs['degree_ig']) for v in G_ig.vs(): degrees_ig[v['name']] = v['degree_ig'] / float(N - 1) # get the matching clustering # when nw gives 0 for clustering, ig gives nan print nc, '\tGet IG clustering' clusterings_ig = {} G_ig.vs['clustering_ig'] = G_ig.transitivity_local_undirected( weights=None) for v in G_ig.vs(): if np.isnan(v['clustering_ig']): v['clustering_ig'] = 0 clusterings_ig[v['name']] = v['clustering_ig'] # match betweenness # nx gives the normalzed betweenness, while igraph gives the raw value. normalization vactor is # Bnorm = = (n*n-3*n+2) / 2.0 http://igraph.org/r/doc/betweenness.html print nc, '\tGet IG betweenness' G_ig.vs['betweenness_ig'] = G_ig.betweenness(weights=None) betweennesses_ig = {} n = len(G_ig.vs()) for v in G_ig.vs(): Bnormalizer = (n * n - 3 * n + 2) / 2.0 betweennesses_ig[v['name']] = v['betweenness_ig'] / Bnormalizer # comparing closeness: # NX: If the graph is not completely connected, this algorithm computes the closeness centrality for each connected part separately. # https://networkx.github.io/documentation/networkx-1.10/reference/generated/networkx.algorithms.centrality.closeness_centrality.html # IG: If the graph is not connected, and there is no path between two vertices, the number of vertices is used instead the length of the geodesic. This is always longer than the longest possible geodesic. # http://igraph.org/python/doc/igraph.GraphBase-class.html#closeness print nc, '\tGet IG closeness' closenesses_ig = {} G_ig.vs['closeness_ig'] = G_ig.closeness(weights=None, normalized=False) for v in G_ig.vs(): closenesses_ig[v['name']] = v['closeness_ig'] # get matching pagerank values # they match, besides some numerical things print nc, '\tGet IG pageranks' pageranks_ig = {} G_ig.vs['pagerank_ig'] = G_ig.pagerank(weights=None) for v in G_ig.vs(): pageranks_ig[v['name']] = v['pagerank_ig'] # constrains match well print nc, '\tGet IG constraint' constraints_ig = {} G_ig.vs['constraint_ig'] = G_ig.constraint(weights=None) for v in G_ig.vs(): constraints_ig[v['name']] = v['constraint_ig'] # G_ig.vs['eigenvector_ig'] = G_ig.eigenvector_centrality( weights = None ) degrees_ig = add_df_meas(degrees_ig, 'degree_ig') clusterings_ig = add_df_meas(clusterings_ig, 'clustering_ig') betweennesses_ig = add_df_meas(betweennesses_ig, 'betweennesse_ig') pageranks_ig = add_df_meas(pageranks_ig, 'pagerank_ig') constraints_ig = add_df_meas(constraints_ig, 'constraint_ig') closenesses_ig = add_df_meas(closenesses_ig, 'closenesse_ig') df_ig = degrees_ig.merge(clusterings_ig, left_index=True, right_index=True) df_ig = df_ig.merge(pageranks_ig, left_index=True, right_index=True) df_ig = df_ig.merge(betweennesses_ig, left_index=True, right_index=True) df_ig = df_ig.merge(closenesses_ig, left_index=True, right_index=True) df_ig = df_ig.merge(constraints_ig, left_index=True, right_index=True) t2 = time.time() t_ig = t2 - t1 time_nx.append(t_ig) print 'Time for IG: ', round(t_ig, 2), ' s\n\n' df_nx.to_csv(folderout + 'nc_backboned_centralities_NX_' + str(nc), na_rep='nan') df_ig.to_csv(folderout + 'nc_backboned_centralities_IG_' + str(nc), na_rep='nan') if compare: compare('degree ', dict(degrees_nx.degree_nx), degrees_ig, GC_nx) compare('clustering', dict(clusterings_nx.clustering_nx), clusterings_ig, GC_nx) compare('pagerank ', dict(pageranks_nx.pagerank_nx), pageranks_ig, GC_nx) compare('betweenness', dict(betweennesses_nx.betweenness_nx), betweennesses_ig, GC_nx) compare('closeness', dict(closenesses_nx.closeness_nx), closenesses_ig, GC_nx) compare('constraint', dict(constraints_nx.constraint_nx), constraints_ig, GC_nx) ftimes.write(str(nc) + '\t' + str(t_nx) + '\t' + str(t_ig) + '\n') ftimes.close()
def betweenness_centrality(self): self.betweenness_centrality_dict = nx.betweenness_centrality(self.G)
def get_center(graph: nx.Graph) -> Hashable: centralities = nx.betweenness_centrality(graph) return max(centralities, key=centralities.get)
G = nx.Graph() G.add_edges_from([(1, 2), (3, 11), (4, 5), (5, 6), (5, 7), (5, 8), (5, 9), (5, 10), (10, 11), (10, 13), (11, 13), (12, 14), (12, 15), (13, 14), (13, 15), (13, 16), (13, 17), (14, 15), (14, 16), (15, 16)]) dict_degree = {} dict_closeness = {} dict_beweeness = {} dict_coreness = {} for each in G.nodes(): dict_degree[each] = G.degree(each) dict_closeness[each] = nx.closeness_centrality(G, each) dict_beweeness[each] = nx.betweenness_centrality(G, each) dict_coreness[each] = nx.core_number(G)[each] dict_cascade = {} #holds cascading power of nodes for each in G.nodes(): c = [] for num in range( 0, 1000 ): #cascade is random thus we average out total number of infected people for 1000 iteration seed = [each] i = independentcascade(G, seed) c.append(len(i)) dict_cascade[each] = numpy.average(c) sorted_dict_cascade = sorted(dict_cascade, key=dict_cascade.get, reverse=True) sorted_dict_deg = sorted(dict_degree, key=dict_degree.get, reverse=True)
def get_graph_properties(edges): # Set up graph connections = np.array([int(x) for x in edges.split(';')]) nodes = sorted(list(set(connections))) # Calculate Properties properties = [] timings = {} if connections[0] > 0: edges = connections.reshape(int(connections.size / 2), 2) timeS = time.time() # directed graph G = nx.DiGraph() G.add_edges_from(edges) # undirected graph U = nx.Graph() U.add_edges_from(edges) # graph generated # property 1: number of components num_comp = nx.number_connected_components(U) properties.append(num_comp) # property 2: number of strongly connected components num_strong_comp = nx.number_strongly_connected_components(G) properties.append(num_strong_comp) # property 3: average in/out degree indeg = [] outdeg = [] indeg_ls = list(G.in_degree()) outdeg_ls = list(G.out_degree()) for x in np.arange(len(nodes)): indeg.append(indeg_ls[x][1]) outdeg.append(outdeg_ls[x][1]) av_deg = np.mean(indeg) properties.append(av_deg) # property 4: link density linkden = connections.size / (len(nodes) * len(nodes)) properties.append(linkden) # property 5: number of self loops numloop = list(G.selfloop_edges()) numloop = len(numloop) properties.append(numloop) # # property 6: number of simple cycles (excluding self loops) # numcyc = list(nx.simple_cycles(G)) # numcyc = len(numcyc) - numloop # properties.append(numcyc) # timings.update({'p6':time.time()-timeS}) # print('p6') # print(timings['p6']) # timeS = time.time() # find all components components = list(nx.connected_components(U)) ischain = [None] * len(components) istree = [None] * len(components) isdag = [None] * len(components) unicel = [None] * len(components) isscc = [None] * len(components) iscyc = [None] * len(components) iseul = [None] * len(components) indeg_by_comp = [] outdeg_by_comp = [] node_conn = [0] * len(components) av_clust = [0.] * len(components) assort = [0.] * len(components) indeg_cen_av = [0.] * len(components) indeg_cen_max = [0.] * len(components) indeg_cen_min = [0.] * len(components) outdeg_cen_av = [0.] * len(components) outdeg_cen_max = [0.] * len(components) outdeg_cen_min = [0.] * len(components) bet_cen_av = [0.] * len(components) bet_cen_max = [0.] * len(components) bet_cen_min = [0.] * len(components) eig_cen_av = [0.] * len(components) eig_cen_max = [0.] * len(components) eig_cen_min = [0.] * len(components) triangles_av = [0.] * len(components) triangles_max = [0.] * len(components) triangles_min = [0.] * len(components) squares_av = [0.] * len(components) squares_max = [0.] * len(components) squares_min = [0.] * len(components) transitivity = [0.] * len(components) rc = [0.] * len(components) loopnumber = [0] * len(components) for compnum in np.arange(len(components)): # property 6: ischain?(remove self-loops and then test this property) # want: how many chains does the graph contain.. look at each component, not the whole graph in one go. # most graphs are single components. G1 = G.subgraph(list(components[compnum])) Gnoself = G1.copy() Gnoself.remove_edges_from(Gnoself.selfloop_edges()) Unoself = nx.Graph() Unoself.add_edges_from(Gnoself.edges) # if all in and out degrees are 1, graph is a chain..do not include in trees indeg2 = [] outdeg2 = [] indeg_ls2 = list(Gnoself.in_degree()) outdeg_ls2 = list(Gnoself.out_degree()) # nx gives indeg and outdeg as tuples (nodename, in/out deg). which is why i need the for loop below for x in np.arange(len(G1.nodes())): indeg2.append(indeg_ls2[x][1]) outdeg2.append(outdeg_ls2[x][1]) indeg_by_comp.append(int_arr_to_str(indeg2, delim=';')) outdeg_by_comp.append(int_arr_to_str(outdeg2, delim=';')) indeg2 = np.array(indeg2) outdeg2 = np.array(outdeg2) in_min_out = indeg2 - outdeg2 ischain[compnum] = int((np.sum(in_min_out) == 0) & (np.sum(np.abs(in_min_out)) == 2) & (np.all(indeg2 <= 1)) & (np.all(outdeg2 <= 1))) # property 7: istree(remove chains first) istree[compnum] = int((nx.is_tree(Gnoself) - ischain[compnum]) > 0) # property 8: isdag(only looking at DAGs other than trees and chains) isdag[compnum] = int((int(nx.is_directed_acyclic_graph(Gnoself)) - istree[compnum] - ischain[compnum]) > 0) if isdag[compnum] > 0: loopnumber[compnum] = len(list( Gnoself.edges)) - (len(list(Gnoself.nodes)) - 1) # property 9: single celled unicel[compnum] = int(len(Gnoself.nodes) == 1) istree[compnum] = int(istree[compnum]) - int( unicel[compnum] ) # nx counts single node with no self-edge as a tree # property 10: isscc (excluding unicellular) num_strong_comp2 = nx.number_strongly_connected_components(Gnoself) isscc[compnum] = int(num_strong_comp2 == 1) isscc[compnum] = int((isscc[compnum] - unicel[compnum]) > 0) # property 11: iscyc(cyclic graphs other than those with a single scc and single celled graphs) iscyc[compnum] = int((isdag[compnum] + istree[compnum] + ischain[compnum] + isscc[compnum] + unicel[compnum]) == 0) # property 12: is eulerian iseul[compnum] = int(nx.is_eulerian(Gnoself)) # property 13: node connectivity node_conn[compnum] = approx.node_connectivity(Gnoself) # property 14: clustering coefficient av_clust[compnum] = nx.average_clustering(Gnoself) # property 15: assortativity(pearson's coefficient) try: assort[compnum] = nx.degree_pearson_correlation_coefficient( Gnoself) #####################check except: assort[compnum] = 0.0 # property 16,17,18: in degree centrality (average, maximum and minimum) indeg_cen = [] dict1 = nx.in_degree_centrality(Gnoself) for a1 in dict1: indeg_cen.append(dict1[a1]) indeg_cen_av[compnum] = np.average(indeg_cen) indeg_cen_max[compnum] = max(indeg_cen) indeg_cen_min[compnum] = min(indeg_cen) # property 19,20,21: out degree centrality (average, maximum, minimum) outdeg_cen = [] dict1 = nx.out_degree_centrality(Gnoself) for a1 in dict1: outdeg_cen.append(dict1[a1]) outdeg_cen_av[compnum] = np.average(outdeg_cen) outdeg_cen_max[compnum] = max(outdeg_cen) outdeg_cen_min[compnum] = min(outdeg_cen) # property 22,23,24: betweenness centrality (average,maximum, minimum) bet_cen = [] dict1 = nx.betweenness_centrality(Gnoself) for a1 in dict1: bet_cen.append(dict1[a1]) bet_cen_av[compnum] = np.average(bet_cen) bet_cen_max[compnum] = max(bet_cen) bet_cen_min[compnum] = min(bet_cen) # property 25,26,27: eigen vector centrality (average,maximum, minimum) eig_cen = [] try: dict1 = nx.eigenvector_centrality(Gnoself) for a1 in dict1: eig_cen.append(dict1[a1]) eig_cen_av[compnum] = np.average(eig_cen) eig_cen_max[compnum] = max(eig_cen) eig_cen_min[compnum] = min(eig_cen) except nx.PowerIterationFailedConvergence: pass # property 28,29,30: number of triangles for each node (average,maximum, minimum) triangles = [] dict1 = nx.triangles(Unoself) for a1 in dict1: triangles.append(dict1[a1]) if len(triangles): triangles_av[compnum] = np.average(triangles) triangles_max[compnum] = max(triangles) triangles_min[compnum] = min(triangles) # property 31: transitivity (fraction of all possible triangles present in the graph) transitivity[compnum] = nx.transitivity(Gnoself) # property 32,33,34: square clustering for each node(fraction of all possible squares present at a node) squares = [] dict1 = nx.square_clustering(Gnoself) for a1 in dict1: squares.append(dict1[a1]) if len(squares): squares_av[compnum] = np.average(squares) squares_max[compnum] = max(squares) squares_min[compnum] = min(squares) # propery 35: rich club coefficient if len(list(Unoself.nodes())) > 3: rc[compnum] = 0.0 # rc[compnum] = nx.rich_club_coefficient(Unoself).values()# only works if graph has 4 or more edges # property 36 and 37: number of source and target nodes iseul = sum(iseul) iscyc = sum(iscyc) isscc = sum(isscc) unicel = sum(unicel) isdag = sum(isdag) istree = sum(istree) ischain = sum(ischain) indeg_by_comp = ';'.join([str(x) for x in indeg_by_comp]) outdeg_by_comp = ';'.join([str(x) for x in outdeg_by_comp]) node_conn = ';'.join([str(x) for x in node_conn ]) # node connectivity for each component avav_clust = np.average( av_clust) # average clustering coefficient over all components av_clust = ';'.join([ str(round(x, 2)) for x in av_clust ]) # average clustering coefficients for each component av_assort = np.average( assort) # average assortativity over all components assort = ';'.join([str(round(x, 2)) for x in assort ]) # assortativity for each component indeg_cen_avav = np.average( indeg_cen_av) # average indeg centrality over all components indeg_cen_av = ';'.join([ str(round(x, 2)) for x in indeg_cen_av ]) # average indeg centrality for each component indeg_cen_maxmax = max( indeg_cen_max) # maximum indeg centrality across all components indeg_cen_max = ';'.join([ str(round(x, 2)) for x in indeg_cen_max ]) # maximum indeg centrality for each component indeg_cen_minmin = min( indeg_cen_min) # minimum indeg centrality across all components indeg_cen_min = ';'.join([ str(round(x, 2)) for x in indeg_cen_min ]) # minimum indeg centrality for each component outdeg_cen_avav = np.average(outdeg_cen_av) outdeg_cen_av = ';'.join([str(round(x, 2)) for x in outdeg_cen_av]) outdeg_cen_maxmax = max(outdeg_cen_max) outdeg_cen_max = ';'.join([str(round(x, 2)) for x in outdeg_cen_max]) outdeg_cen_minmin = min(outdeg_cen_min) outdeg_cen_min = ';'.join([str(round(x, 2)) for x in outdeg_cen_min]) bet_cen_avav = np.average(bet_cen_av) bet_cen_av = ';'.join([str(round(x, 2)) for x in bet_cen_av]) bet_cen_maxmax = max(bet_cen_max) bet_cen_max = ';'.join([str(round(x, 2)) for x in bet_cen_max]) bet_cen_minmin = min(bet_cen_min) bet_cen_min = ';'.join([str(round(x, 2)) for x in bet_cen_min]) eig_cen_avav = np.average(eig_cen_av) eig_cen_av = ';'.join([str(round(x, 2)) for x in eig_cen_av]) eig_cen_maxmax = max(eig_cen_max) eig_cen_max = ';'.join([str(round(x, 2)) for x in eig_cen_max]) eig_cen_minmin = min(eig_cen_min) eig_cen_min = ';'.join([str(round(x, 2)) for x in eig_cen_min]) triangles_avav = np.average(triangles_av) triangles_av = ';'.join([str(x) for x in triangles_av]) triangles_maxmax = max(triangles_max) triangles_max = ';'.join([str(x) for x in triangles_max]) triangles_minmin = min(triangles_min) triangles_min = ';'.join([str(x) for x in triangles_min]) transitivity_av = np.average(transitivity) transitivity_max = max(transitivity) transitivity_min = min(transitivity) transitivity = ';'.join([str(x) for x in transitivity]) squares_avav = np.average(squares_av) squares_maxmax = max(squares_max) squares_minmin = min(squares_min) squares_av = ';'.join([str(x) for x in squares_av]) squares_max = ';'.join([str(x) for x in squares_max]) squares_min = ';'.join([str(x) for x in squares_min]) rc_av = np.average(rc) rc_max = max(rc) rc_min = min(rc) rc = ';'.join([str(x) for x in rc]) ln = [loopnumber[x] for x in np.nonzero(loopnumber)[0]] if any(ln): loopnumber_av = np.average(ln) else: loopnumber_av = 0.0 loopnumber = ';'.join([str(x) for x in loopnumber]) # check.. sum of iscyc, isscc, unicel, dag,tree, chain should be the total number of components if num_comp != (iscyc + isscc + unicel + isdag + istree + ischain): print('Number of components is wrong!!!!!!') print(num_comp) print([iscyc, isscc, unicel, isdag, istree, ischain]) sys.exit() properties.append(indeg_by_comp) # string properties.append(outdeg_by_comp) #string properties.append(ischain) #int properties.append(istree) #int properties.append(isdag) #int properties.append(unicel) #int properties.append(isscc) #int properties.append(iscyc) #int properties.append(iseul) #int properties.append(loopnumber_av) #float properties.append(loopnumber) #string properties.append(node_conn) #string properties.append(avav_clust) #float properties.append(av_clust) #string properties.append(av_assort) #float properties.append(assort) #string properties.append(indeg_cen_avav) #float properties.append(indeg_cen_av) #string properties.append(indeg_cen_maxmax) #float properties.append(indeg_cen_max) #string properties.append(indeg_cen_minmin) #float properties.append(indeg_cen_min) #string properties.append(outdeg_cen_avav) #float properties.append(outdeg_cen_av) #string properties.append(outdeg_cen_maxmax) #float properties.append(outdeg_cen_max) #string properties.append(outdeg_cen_minmin) #float properties.append(outdeg_cen_min) #string properties.append(bet_cen_avav) #float properties.append(bet_cen_av) #string properties.append(bet_cen_maxmax) #float properties.append(bet_cen_max) #string properties.append(bet_cen_minmin) #float properties.append(bet_cen_min) #string properties.append(eig_cen_avav) #float properties.append(eig_cen_av) #string properties.append(eig_cen_maxmax) #float properties.append(eig_cen_max) #string properties.append(eig_cen_minmin) #float properties.append(eig_cen_min) #string properties.append(triangles_avav) #float properties.append(triangles_av) #string properties.append(triangles_maxmax) #float properties.append(triangles_max) #string properties.append(triangles_minmin) #float properties.append(triangles_min) #string properties.append(transitivity_av) # float properties.append(transitivity_max) #float properties.append(transitivity_min) #float properties.append(transitivity) #string properties.append(squares_avav) #float properties.append(squares_av) #string properties.append(squares_maxmax) #float properties.append(squares_max) #string properties.append(squares_minmin) #float properties.append(squares_min) #string properties.append(rc_av) # float properties.append(rc_max) #float properties.append(rc_min) #float properties.append(rc) #string # append more properties..... # property 14: # property x: in-degree sequence #indeg = # list(G.in_degree())[iterate over number of nodes][1] # property y: out-degree sequence #outdeg = # list(G.in_degree())[iterate over number of nodes][1] #..... else: properties = [0] * 2 + [0.] * 2 + [0] + [''] * 2 + [0] * 7 + [ 0. ] + [''] * 2 + [0., ''] * 17 + [0.] * 3 + [''] + [0., ''] * 3 + [ 0., 0., 0., '' ] # return list of properties return properties
time.sleep(10) ########## MEDIDAS DE CENTRALIDADE ########## print(colored('\n \n ########## MEDIDAS DE CENTRALIDADE ##########', 'red')) time.sleep(3) # Degree centrality (o número de conecções de um nodo para todos os outros) print(colored('\n \n CENTRALIDADE DO GRAU \n', 'red')) time.sleep(3) print(net.degree_centrality(g)) time.sleep(0.5) # Eigenvector centrality (o quão importante é um nodo em função de quão bem conectado está) print(colored('\n \n EIGENVECTOR CENTRALITY \n', 'red')) time.sleep(3) print(net.eigenvector_centrality(g)) time.sleep(0.5) # Closeness centrality (importância de um nodo em função da sua proximidade com os outros da rede) print(colored('\n \n CLOSENESS CENTRALITY \n', 'red')) time.sleep(3) print(net.closeness_centrality(g)) time.sleep(0.5) # Betweeness centrality (quantifica quantas vezes um nodo aparece nos caminhos mais curtos entre dois nodos) print(colored('\n \n BETWEENESS CENTRALITY \n', 'red')) time.sleep(3) print(net.betweenness_centrality(g))
def graphAnalysis(): if not request.headers['tga-key'] or request.headers['tga-key'] != tga_key: return jsonify({ 'message': 'Not authorized for twina graph api.' }) body = request.get_json() graph = http_client.get(fb_db_base_url + body['graph_path'] + ".json") graph = graph.json() G = nx.Graph() G.add_nodes_from([(screen_name, graph['nodes'][screen_name]) for screen_name in graph['nodes']]) G.add_edges_from([ ( graph['edges'][source_target]['source'], graph['edges'][source_target]['target'], graph['edges'][source_target] ) for source_target in graph['edges'] ]) analysis = { # Single-Result 'number_of_nodes': G.number_of_nodes(), 'number_of_edges': G.number_of_edges(), 'average_clustering': nx.average_clustering(G), # Nodes Analysis 'clustering': nx.clustering(G), 'square_clustering': nx.square_clustering(G), 'degree_centrality': nx.degree_centrality(G), 'closeness_centrality': nx.closeness_centrality(G), 'betweenness_centrality': nx.betweenness_centrality(G), } for nodes_analysis in [ 'clustering', 'square_clustering', 'degree_centrality', 'closeness_centrality', 'betweenness_centrality' ]: print(analysis[nodes_analysis].keys()) for node in analysis[nodes_analysis].keys(): if 'analysis' not in graph['nodes'][node].keys(): graph['nodes'][node]['analysis'] = {} graph['nodes'][node]['analysis'][nodes_analysis] = analysis[nodes_analysis][node] try: # post analysis http_client.put(fb_db_base_url + body['analysis_path'] + ".json", data=json.dumps(analysis)) # modify graph with analysis http_client.put(fb_db_base_url + body['graph_path'] + ".json", data=json.dumps({ 'nodes': graph['nodes'], 'edges': graph['edges'] })) except Exception as e: current_app.logger.error('Failed to post analysis: ' + str(e)) return jsonify({ 'message': 'Graph analyzed', # 'data': analysis, })
def computeComponent(size): randomCompMean = [] degreeCompMean = [] closenessCompMean = [] betweennessCompMean = [] for r in range(1, 2): print("step: ", r) graph = nx.erdos_renyi_graph(1000, 0.1) graph1 = graph.copy() graph2 = graph.copy() graph3 = graph.copy() # Remove high-degree nodes, create a list of conneted_components lenght (attack) listDegree = [ x[0] for x in sorted( dict(graph.degree()).items(), reverse=True, key=lambda x: x[1]) ] degreeCompMean.append(connectedComponentsList(graph, listDegree, size)) print("degree") # Remove random sample nodes, create a list of connected_components lenght listRandomNodes = random.sample(graph1.nodes(), len(graph1.nodes())) randomCompMean.append( connectedComponentsList(graph1, listRandomNodes, size)) print("random") # Remove high closeness_centrality nodes, create a list of connected_components lenght listClosenessCentrality = [ x[0] for x in sorted(dict(nx.closeness_centrality(graph2)).items(), reverse=True, key=lambda x: x[1]) ] closenessCompMean.append( connectedComponentsList(graph2, listClosenessCentrality, size)) # Remove high betweeness_centrality nodes, create a list of connected_components lenght listBetweennessCentrality = [ x[0] for x in sorted(dict(nx.betweenness_centrality(graph3)).items(), reverse=True, key=lambda x: x[1]) ] betweennessCompMean.append( connectedComponentsList(graph3, listBetweennessCentrality, size)) #print("bet") degreeCompMean = computeMean(degreeCompMean) randomCompMean = computeMean(randomCompMean) closenessCompMean = computeMean(closenessCompMean) betweennessCompMean = computeMean(betweennessCompMean) # plotting: x = [x * 0.01 for x in range(1, 100)] print("x: ", x) degree, = plt.plot(x, degreeCompMean, label="degree (attack)", color='b') randoms, = plt.plot(x, randomCompMean, label="random", color='y') closeness, = plt.plot(x, closenessCompMean, label="closeness", color='g') betweeness = plt.plot(x, betweennessCompMean, label="betweenness", color='r') # drawing legend and titles: legend = plt.legend(bbox_to_anchor=(0.96, 0.94), loc="upper right", borderaxespad=0.) plt.gca().add_artist(legend) #plt.title("Robustness of networks" + "\n" + "Watts, N = 1000, k = 8") plt.xlabel("Removed nodes") plt.ylabel("connected components coefficient") plt.savefig("ER_robustness1.jpg")
def complex_network_mapping(graph): """ Compute the vectorial mapping of a graph based on the computation of several complex-network analysis indexes. """ vect = [] n = nx.number_of_nodes(graph) e = nx.number_of_edges(graph) print n, e # adj = nx.adjacency_matrix(graph).toarray() # adj_bin = np.where(adj > 0, 1., 0.) # adj_conn = 1 - adj adj_bin = nx.adjacency_matrix(graph).toarray() adj_bin = np.array(adj_bin, dtype=np.float) # Node Betweenness binary bt_bin = nx.betweenness_centrality(graph).values() avg_btb = np.mean(bt_bin) vect.append(avg_btb) # Edge betweenness ebt = np.array(nx.edge_betweenness_centrality(graph).values()) vect.append(np.mean(ebt)) # Eigen vector centrality binary evc_bin = eigenvector_centrality_und(adj_bin) avg_evcb = np.mean(evc_bin) vect.append(avg_evcb) # Flow coefficient _, flow_bin, _ = flow_coef_bd(adj_bin) avg_flow = np.mean(flow_bin) vect.append(avg_flow) # Kcoreness centrality kcor_bin, _ = kcoreness_centrality_bu(adj_bin) avg_kcor = np.mean(kcor_bin) vect.append(avg_kcor) # Degree assortivity dac = nx.degree_assortativity_coefficient(graph) vect.append(dac) # Page rank centrality # pgr_wei = pagerank_centrality(adj_bin, d=0.85) # avg_pgr = np.mean(pgr_wei) # vect.append(avg_pgr) # Rich club coefficient # rcc = nx.rich_club_coefficient(graph).values() # avg_rcc = np.mean(rcc) # vect.append(avg_rcc) # Transitivity tr = nx.transitivity(graph) vect.append(tr) # average clustering avg_clst = nx.average_clustering(graph) vect.append(avg_clst) glb_ef = efficiency_bin(adj_bin) vect.append(glb_ef) return vect
#Write out initial graph data in JSON file jsonData = json_graph.node_link_data(M) with open('evo_0.json', 'w') as outfile: json.dump(jsonData, outfile, indent=4) #Eigenvector centrality criteria Meigen = nx.eigenvector_centrality(M) normeigen = [float(i) / max(Meigen.values()) for i in Meigen.values()] #Closeness centrality Mclose = nx.closeness_centrality(M) normclose = Mclose.values() #Betweeness centrality Mbetween = nx.betweenness_centrality(M) normbetween = Mbetween.values() N = len(M.nodes()) labels = [i[1]['name'] for i in M.nodes(data=True)] # ###################### Evolution #################### import operator # Common Neighbors CN = [(e[0], e[1], len(list(nx.common_neighbors(M, e[0], e[1])))) for e in nx.non_edges(M)] CN.sort(key=operator.itemgetter(2), reverse=True) # Jaccard coef
nodes = G1.degree().values() plt.hist(nodes,bins=25) plt.xlim(0,200) plt.show() Counter(nx.degree_centrality(G)).most_common(5) len(list(nx.connected_components(G1))) size = [len(c) for c in nx.connected_components(G1)] plt.hist(size[1:]) G2 = nx.read_edgelist('data/small_actor_edges.tsv', delimiter='\t') len(list(nx.connected_components(G2))) Counter(nx.degree_centrality(G2)).most_common(5) Counter(nx.betweenness_centrality(G2)).most_common(5) karateG = nx.karate_club_graph() # betweenness= nx.edge_betweenness_centrality(karateG) # # u,v = sorted(betweenness.items(), key=lambda x: x[1])[-1][0] # # karateG.remove_edge(u,v)
def set_up_hash_distr(net_p2p, centrality_measure, hash_distribution, number_selfish_nodes, number_honest_nodes, alpha): # make sure that when there are no selfish nodes that alpha is never unequal 0. (in case you want to simulate only honest nodes) assert not (number_selfish_nodes == 0 and alpha != 0), "Alpha unequal 0 with no selfish nodes" if hash_distribution == "UNIFORM": hashing_power_selfish = np.random.random(number_selfish_nodes) hashing_power_honest = np.random.random(number_honest_nodes) elif hash_distribution == "POWERLAW": power_distrib = pl.Power_Law(parameters=[pl_alpha], discrete=False) hashing_power_selfish = power_distrib.generate_random( number_selfish_nodes) hashing_power_honest = power_distrib.generate_random( number_honest_nodes) elif hash_distribution == "EXPONENTIAL": exp_distrib = pl.Exponential(parameters=[exp_lambda]) hashing_power_selfish = exp_distrib.generate_random( number_selfish_nodes) hashing_power_honest = exp_distrib.generate_random( number_honest_nodes) # normalize vector so that sum of selfish hashing power equals alpha & honest hashing power equals 1-alpha. if number_selfish_nodes != 0: hashing_power_selfish /= sum(hashing_power_selfish) hashing_power_selfish *= alpha hashing_power_honest /= sum(hashing_power_honest) / (1 - alpha) # combine selfish and honest hashing power vectors together hashing_power_unsorted = np.append( hashing_power_selfish, hashing_power_honest) if centrality_measure == "RANDOM": # create an is_selfish vector that corresponds to the order of the hashing_power vector is_selfish = np.append(np.ones(number_selfish_nodes), np.zeros(number_honest_nodes)) # finally, randomize is_selfish and hashing_power arrays in unison randomize = np.arange(len(hashing_power_unsorted)) np.random.shuffle(randomize) hashing_power = hashing_power_unsorted[randomize] is_selfish = is_selfish[randomize] elif centrality_measure == "BETWEENNESS": # compute betweenness centrality and sort it btwn = nx.betweenness_centrality(net_p2p) btwn_sorted = {k: v for k, v in sorted( btwn.items(), key=lambda item: item[1], reverse=True)} # return node indeces sorted for betweenness centrality btwn_sorted_indices = list(btwn_sorted.keys()) selfish_indices = list(btwn_sorted.keys())[:number_selfish_nodes] honest_indices = list(btwn_sorted.keys())[ number_selfish_nodes:len(btwn)] # set selifsh nodes according to betweenness centrality is_selfish = np.zeros(number_honest_nodes+number_selfish_nodes) for i in selfish_indices: is_selfish[i] = 1 # sort hashing power vector so that selfish nodes are assigned correct hashing power hashing_power = hashing_power_unsorted.copy() for (index, value) in enumerate(btwn_sorted): hashing_power[value] = hashing_power_unsorted[index] return hashing_power, is_selfish
def function(): mapbox_access_token = 'pk.eyJ1IjoiY2xlaXR1cyIsImEiOiJjamgwZ2c1a3Yxc3dtMnFtb2ptdDR5ZWs0In0.sjZdn45v32AojmWGWIN9Tg' pt.set_credentials_file(username='******', api_key='9LICBZ681YiPTiSZCuFX') # ########################### Reading Initial Data ################################### with open('fb_nodes.json') as f: nodes = json.load(f) with open('fb_edges.json') as f: links = json.load(f) for i in links: i['value'] = 'init' # ########################### Reading Initial Data ################################### #nodes = data['nodes'] #links = data['edges'] M = nx.Graph() M = nx.Graph( [(i['source'], i['target'], {'value': i['value']}) for i in links]) for i in range(len(M.nodes)): node = nodes[i]['id'] M.add_node(node, group=nodes[i]['group']) M.add_node(node, name=nodes[i]['name']) M.add_node(node, istrain=nodes[i]['istrain']) M.add_node(node, lat=nodes[i]['lat']) M.add_node(node, lon=nodes[i]['lon']) M.add_node(node, id=nodes[i]['id']) # ###################### Evolution #################### # Common Neighbors CN = [(e[0], e[1], len(list(nx.common_neighbors(M, e[0], e[1])))) for e in nx.non_edges(M)] CN.sort(key=operator.itemgetter(2), reverse=True) # Jaccard coef jaccard = list(nx.jaccard_coefficient(M)) jaccard.sort(key=operator.itemgetter(2), reverse=True) # Resource Allocation index RA = list(nx.resource_allocation_index(M)) RA.sort(key=operator.itemgetter(2), reverse=True) # Adamic-Adar index AA = list(nx.adamic_adar_index(M)) AA.sort(key=operator.itemgetter(2), reverse=True) # Preferential Attachement PA = list(nx.preferential_attachment(M)) PA.sort(key=operator.itemgetter(2), reverse=True) # ###################### Prediction on Future Edge Linkage #################### FM = M for i in PA[0:int(0.1*len(M.edges()))]: FM.add_edge(i[0], i[1], value='new') for i in CN[0:int(0.1*len(M.edges()))]: FM.add_edge(i[0], i[1], value='new') #Layout pos=nx.fruchterman_reingold_layout(FM, dim=3) lay=list() for i in pos.values(): lay.append(list(i)) N = len(FM.nodes()) ulti = {} for i in pos.keys(): ulti[i]=list(pos[i]) #Eigenvector centrality criteria (normalised) Geigen=nx.eigenvector_centrality(FM) for i in Geigen: ulti[i].append(float(Geigen[i])/max(Geigen.values())) #Closeness centrality Gclose=nx.closeness_centrality(FM) for i in Gclose: ulti[i].append(Gclose[i]) #Betweeness centrality Gbetween=nx.betweenness_centrality(FM) for i in Gbetween: ulti[i].append(Gbetween[i]) # ###################### Plot #################### # Nodes and Edges coordinates Xv=[lay[k][0] for k in range(N)]# x-coordinates of nodes Yv=[lay[k][1] for k in range(N)]# y-coordinates Zv=[lay[k][2] for k in range(N)]# z-coordinates Xed = [] Yed = [] Zed = [] Xned = [] Yned = [] Zned = [] for edge in M.edges(): Xed+=[pos[edge[0]][0],pos[edge[1]][0], None] Yed+=[pos[edge[0]][1],pos[edge[1]][1], None] Zed+=[pos[edge[0]][2],pos[edge[1]][2], None] for edge in [(i[0], i[1]) for i in list(FM.edges(data=True)) if i[2]['value'] == 'new']: Xned+=[pos[edge[0]][0],pos[edge[1]][0], None] Yned+=[pos[edge[0]][1],pos[edge[1]][1], None] Zned+=[pos[edge[0]][2],pos[edge[1]][2], None] trace1=Scatter3d(x=Xed, y=Yed, z=Zed, mode='lines', line=Line(color='rgb(125,125,125)', width=1), hoverinfo='none' ) trace2=Scatter3d(x=Xv, y=Yv, z=Zv, mode='markers', name='actors', marker=Marker(symbol='dot', color=[i[-3] for i in ulti.values()], # Eigenvector centrality #color=[i[-2] for i in ulti.values()], # Closeness centrality #color=[i[-1] for i in ulti.values()], # Betweeness centrality #color=[data['nodes'][k]['group'] for k in range(len(data['nodes']))], # size=6,colorbar=ColorBar( title='' ), colorscale='Viridis', line=Line(color='rgb(158,18,130)', width=0.5) ), text=ulti.keys(), # node Labels hoverinfo='text' ) data=Data([trace1, trace2]) py.plot(data, filename = 'fb-3d') return
import networkx as nx import numpy as np from bokeh.palettes import YlOrRd df = pd.read_csv( 'C:/Users/Meenu/PycharmProjects/CS590/CS590-Yelp/usernetwork1.csv') df['distance'] = 1 / df['strength'] df_user = pd.read_csv( 'C:/Users/Meenu/PycharmProjects/CS590/CS590-Yelp/userdetails1.csv') del df_user['Unnamed: 0'] G = nx.from_pandas_edgelist(df, 'user1', 'user2', ['strength', 'distance']) print(nx.number_connected_components(G)) nx.set_node_attributes(G, df_user.set_index('user_id').to_dict('index')) nx.set_node_attributes(G, dict(G.degree(weight='strength')), 'WDegree') nx.set_node_attributes(G, nx.betweenness_centrality(G, weight='distance'), 'bwcentral') nx.set_node_attributes(G, nx.communicability_betweenness_centrality(G), 'ccentral') # col = ['#FFFFFF', '#93CCB9', '#4D9980', '#24745A', '#074A34', '#002217'] col = YlOrRd[8] for u in G.nodes(): if G.node[u]['friend'] < 730: G.node[u]['friend'] = col[7] elif G.node[u]['friend'] < (730 * 2): G.node[u]['friend'] = col[6] elif G.node[u]['friend'] < (730 * 3): G.node[u]['friend'] = col[5] elif G.node[u]['friend'] < (730 * 4):
def Between_Centrality(G): Bet_Centrality = nx.betweenness_centrality(G) #print "Bet_Centrality:", sorted(Bet_Centrality.iteritems(), key=lambda d:d[1], reverse = True) return Bet_Centrality
import pandas as pd import numpy as np import networkx as nx import matplotlib.pyplot as plt from mpl_toolkits.basemap import Basemap data = pd.read_csv('erl_14_8_084011_sd_3.csv') G = nx.from_pandas_edgelist(df=data, source='ori', target='des', edge_attr='total', create_using=nx.DiGraph()) connectivity = list(G.degree()) connectivity_values = [n[1] for n in connectivity] centrality = nx.betweenness_centrality(G).values() plt.figure(figsize=(12, 8)) plt.plot(centrality, connectivity_values, 'ro') plt.xlabel('Node centrality', fontsize='large') plt.ylabel('Node connectivity', fontsize='large') plt.savefig("node_connectivity.png", dpi=300) plt.show() #Get 95th percentile of largest flows threshold = np.percentile(data['total'], 95) data = data.loc[(data['total'] > threshold)] pos_data = pd.read_csv('counties.csv', delimiter=',') G = nx.from_pandas_edgelist(df=data, source='ori',
def centrality_fun(graph, feature_dim): nodes = list(graph.G.nodes) centrality = nx.betweenness_centrality(graph.G) graph.betweenness_centrality = torch.tensor( [centrality[x] for x in nodes]).unsqueeze(1) return graph
def a_avg_between(G): return np.average(nx.betweenness_centrality(G, normalized=True).values())
for i in nodes: if i[1] == 1: # hateful node for j in nx_graph.neighbors(i[0]): hateful_neighbors[j] = True if i[1] == 0: for j in nx_graph.neighbors(i[0]): normal_neighbors[j] = True nx.set_node_attributes(nx_graph, name="hateful_neighbors", values=False) nx.set_node_attributes(nx_graph, name="hateful_neighbors", values=hateful_neighbors) nx.set_node_attributes(nx_graph, name="normal_neighbors", values=False) nx.set_node_attributes(nx_graph, name="normal_neighbors", values=normal_neighbors) # Set node network-based attributes, such as betweenness and eigenvector betweenness = nx.betweenness_centrality(nx_graph, k=16258, normalized=False) eigenvector = nx.eigenvector_centrality(nx_graph) in_degree = nx.in_degree_centrality(nx_graph) out_degree = nx.out_degree_centrality(nx_graph) nx.set_node_attributes(nx_graph, name="betweenness", values=betweenness) nx.set_node_attributes(nx_graph, name="eigenvector", values=eigenvector) nx.set_node_attributes(nx_graph, name="in_degree", values=in_degree) nx.set_node_attributes(nx_graph, name="out_degree", values=out_degree) nx.write_graphml(nx_graph, "../data/features/users_hate.graphml")
def extended_stats(G, connectivity=False, anc=False, ecc=False, bc=False, cc=False): """ Calculate extended topological stats and metrics for a graph. Many of these algorithms have an inherently high time complexity. Global topological analysis of large complex networks is extremely time consuming and may exhaust computer memory. Consider using function arguments to not run metrics that require computation of a full matrix of paths if they will not be needed. Parameters ---------- G : networkx multidigraph connectivity : bool if True, calculate node and edge connectivity anc : bool if True, calculate average node connectivity ecc : bool if True, calculate shortest paths, eccentricity, and topological metrics that use eccentricity bc : bool if True, calculate node betweenness centrality cc : bool if True, calculate node closeness centrality Returns ------- stats : dict dictionary of network measures containing the following elements (some only calculated/returned optionally, based on passed parameters): - avg_neighbor_degree - avg_neighbor_degree_avg - avg_weighted_neighbor_degree - avg_weighted_neighbor_degree_avg - degree_centrality - degree_centrality_avg - clustering_coefficient - clustering_coefficient_avg - clustering_coefficient_weighted - clustering_coefficient_weighted_avg - pagerank - pagerank_max_node - pagerank_max - pagerank_min_node - pagerank_min - node_connectivity - node_connectivity_avg - edge_connectivity - eccentricity - diameter - radius - center - periphery - closeness_centrality - closeness_centrality_avg - betweenness_centrality - betweenness_centrality_avg """ stats = {} full_start_time = time.time() # create a DiGraph from the MultiDiGraph, for those metrics that require it G_dir = nx.DiGraph(G) # create an undirected Graph from the MultiDiGraph, for those metrics that # require it G_undir = nx.Graph(G) # get the largest strongly connected component, for those metrics that # require strongly connected graphs G_strong = get_largest_component(G, strongly=True) # average degree of the neighborhood of each node, and average for the graph avg_neighbor_degree = nx.average_neighbor_degree(G) stats['avg_neighbor_degree'] = avg_neighbor_degree stats['avg_neighbor_degree_avg'] = sum( avg_neighbor_degree.values()) / len(avg_neighbor_degree) # average weighted degree of the neighborhood of each node, and average for # the graph avg_weighted_neighbor_degree = nx.average_neighbor_degree(G, weight='length') stats['avg_weighted_neighbor_degree'] = avg_weighted_neighbor_degree stats['avg_weighted_neighbor_degree_avg'] = sum( avg_weighted_neighbor_degree.values()) / len( avg_weighted_neighbor_degree) # degree centrality for a node is the fraction of nodes it is connected to degree_centrality = nx.degree_centrality(G) stats['degree_centrality'] = degree_centrality stats['degree_centrality_avg'] = sum( degree_centrality.values()) / len(degree_centrality) # calculate clustering coefficient for the nodes stats['clustering_coefficient'] = nx.clustering(G_undir) # average clustering coefficient for the graph stats['clustering_coefficient_avg'] = nx.average_clustering(G_undir) # calculate weighted clustering coefficient for the nodes stats['clustering_coefficient_weighted'] = nx.clustering(G_undir, weight='length') # average clustering coefficient (weighted) for the graph stats['clustering_coefficient_weighted_avg'] = nx.average_clustering( G_undir, weight='length') # pagerank: a ranking of the nodes in the graph based on the structure of # the incoming links pagerank = nx.pagerank(G_dir, weight='length') stats['pagerank'] = pagerank # node with the highest page rank, and its value pagerank_max_node = max(pagerank, key=lambda x: pagerank[x]) stats['pagerank_max_node'] = pagerank_max_node stats['pagerank_max'] = pagerank[pagerank_max_node] # node with the lowest page rank, and its value pagerank_min_node = min(pagerank, key=lambda x: pagerank[x]) stats['pagerank_min_node'] = pagerank_min_node stats['pagerank_min'] = pagerank[pagerank_min_node] # if True, calculate node and edge connectivity if connectivity: start_time = time.time() # node connectivity is the minimum number of nodes that must be removed # to disconnect G or render it trivial stats['node_connectivity'] = nx.node_connectivity(G_strong) # edge connectivity is equal to the minimum number of edges that must be # removed to disconnect G or render it trivial stats['edge_connectivity'] = nx.edge_connectivity(G_strong) log('Calculated node and edge connectivity in {:,.2f} seconds'.format( time.time() - start_time)) # if True, calculate average node connectivity if anc: # mean number of internally node-disjoint paths between each pair of # nodes in G, i.e., the expected number of nodes that must be removed to # disconnect a randomly selected pair of non-adjacent nodes start_time = time.time() stats['node_connectivity_avg'] = nx.average_node_connectivity(G) log('Calculated average node connectivity in {:,.2f} seconds'.format( time.time() - start_time)) # if True, calculate shortest paths, eccentricity, and topological metrics # that use eccentricity if ecc: # precompute shortest paths between all nodes for eccentricity-based # stats start_time = time.time() sp = { source: dict( nx.single_source_dijkstra_path_length(G_strong, source, weight='length')) for source in G_strong.nodes() } log('Calculated shortest path lengths in {:,.2f} seconds'.format( time.time() - start_time)) # eccentricity of a node v is the maximum distance from v to all other # nodes in G eccentricity = nx.eccentricity(G_strong, sp=sp) stats['eccentricity'] = eccentricity # diameter is the maximum eccentricity diameter = nx.diameter(G_strong, e=eccentricity) stats['diameter'] = diameter # radius is the minimum eccentricity radius = nx.radius(G_strong, e=eccentricity) stats['radius'] = radius # center is the set of nodes with eccentricity equal to radius center = nx.center(G_strong, e=eccentricity) stats['center'] = center # periphery is the set of nodes with eccentricity equal to the diameter periphery = nx.periphery(G_strong, e=eccentricity) stats['periphery'] = periphery # if True, calculate node closeness centrality if cc: # closeness centrality of a node is the reciprocal of the sum of the # shortest path distances from u to all other nodes start_time = time.time() closeness_centrality = nx.closeness_centrality(G, distance='length') stats['closeness_centrality'] = closeness_centrality stats['closeness_centrality_avg'] = sum( closeness_centrality.values()) / len(closeness_centrality) log('Calculated closeness centrality in {:,.2f} seconds'.format( time.time() - start_time)) # if True, calculate node betweenness centrality if bc: # betweenness centrality of a node is the sum of the fraction of # all-pairs shortest paths that pass through node # networkx 2.4+ implementation cannot run on Multi(Di)Graphs, so use DiGraph start_time = time.time() betweenness_centrality = nx.betweenness_centrality(G_dir, weight='length') stats['betweenness_centrality'] = betweenness_centrality stats['betweenness_centrality_avg'] = sum( betweenness_centrality.values()) / len(betweenness_centrality) log('Calculated betweenness centrality in {:,.2f} seconds'.format( time.time() - start_time)) log('Calculated extended stats in {:,.2f} seconds'.format(time.time() - full_start_time)) return stats
def construct_ccig(sentences, concepts, title=None, use_cd=True, betweenness_threshold_coef=1.0, max_c_size=10, min_c_size=3, IDF=None): """ Given a segmented text and a list of concepts, construct concept community interaction graph. :param sentences: a list of sentences. :param concepts: a list of concepts. :return: a concept community interaction graph. """ g = nx.Graph() concepts = list(set(concepts)) concepts = remove_values_from_list(concepts, EMPTY_VERTEX_NAME) if len(sentences) == 0 or len(concepts) == 0: print("No concept in concepts list.") return None if len(concepts) > 70: print("Too many concepts.") return None # get concept communities if use_cd: concept_communities = get_concept_communities(sentences, concepts, betweenness_threshold_coef, max_c_size, min_c_size) else: concept_communities = [[c] for c in concepts] if use_cd: cname_sentidxs = assign_sentences_to_concept_communities( sentences, concept_communities, IDF) else: cname_sentidxs = assign_sentences_to_concepts(sentences, concepts) # initialize vertex properties concept_vertexidxs_map = {} for c in concepts: concept_vertexidxs_map[c] = [] g.add_node(0, name=EMPTY_VERTEX_NAME, concepts=[], sentidxs=cname_sentidxs[EMPTY_VERTEX_NAME]) # g.add_node(0) # g.node[0]['name'] = EMPTY_VERTEX_NAME # g.node[0]['concepts'] = [] # g.node[0]['sentidxs'] = cname_sentidxs[EMPTY_VERTEX_NAME] # print(g.node[0]) i = 1 for community in concept_communities: cname = community2name(community) if len(cname_sentidxs[cname]) == 0: continue g.add_node(i, name=cname, concepts=community, sentidxs=cname_sentidxs[cname]) for concept in community: concept_vertexidxs_map[concept].append(i) i = i + 1 # edges by connective entences # dic eprop_name = {} eprop_concepts = {} eprop_sentidxs = {} eprop_weight_numsent = {} eprop_weight_tfidf = {} for sent_idx in range(len(sentences)): sent = sentences[sent_idx] words = str(sent).split() intersect = set(words).intersection(set(concepts)) if len(intersect) == 0: continue related_vertexidxs = [] for c in intersect: related_vertexidxs.extend(concept_vertexidxs_map[c]) related_vertexidxs = list(set(related_vertexidxs)) # print("related_vertex_idx:") # print(related_vertexidxs) num_related_v = len(related_vertexidxs) if num_related_v < 2: continue for j in range(num_related_v): v1_idx = related_vertexidxs[j] for k in range(j, num_related_v): if j == k: continue v2_idx = related_vertexidxs[k] source_idx = min(v1_idx, v2_idx) target_idx = max(v1_idx, v2_idx) e = (source_idx, target_idx) if not g.has_edge(source_idx, target_idx): # g.add_edge(source_idx, target_idx) eprop_sentidxs[e] = [sent_idx] eprop_concepts[e] = list(intersect) g.add_edge(source_idx, target_idx) # g.add_edges_from([(source_idx, target_idx, dict(sentidxs=eprop_sentidxs[e])), # (source_idx, target_idx, dict(concepts=eprop_concepts[e]))]) else: old_idxs = list(eprop_sentidxs[e]) old_idxs.append(sent_idx) eprop_sentidxs[e] = old_idxs old_concepts = list(eprop_concepts[e]) old_concepts.extend(intersect) eprop_concepts[e] = list(set(old_concepts)) g[source_idx][target_idx]['sentidxs'] = eprop_sentidxs[e] g[source_idx][target_idx]['concepts'] = eprop_concepts[e] # assign vertex names and weights for e in g.edges(): eprop_name[e] = " ".join(eprop_concepts[e]) eprop_weight_numsent[e] = float(len(eprop_sentidxs[e])) eprop_weight_tfidf[e] = 0.0 g[e[0]][e[1]]['weight_numsent'] = eprop_weight_numsent[e] g[e[0]][e[1]]['weight_tfidf'] = eprop_weight_tfidf[e] # edges by node text similarity WEIGHT_THRESHOLD = 0.001 # NOTICE: smaller threshold leads to more edges numv = g.number_of_nodes() for i in range(numv): for j in range(i, numv): if j == i: continue v1 = g.node[i] v2 = g.node[j] idxs1 = list(set(v1['sentidxs'])) idxs2 = list(set(v2['sentidxs'])) text1 = [sentences[s] for s in idxs1] text1 = " ".join(text1) text2 = [sentences[s] for s in idxs2] text2 = " ".join(text2) w = tfidf_cos_sim(text1, text2, IDF) if w >= WEIGHT_THRESHOLD: e = (i, j) if not g.has_edge(i, j): eprop_sentidxs[e] = [] eprop_concepts[e] = [] eprop_weight_numsent[e] = 0.0 eprop_name[e] = "" g.add_edges_from([ (i, j, dict(sentidxs=eprop_sentidxs[e])), (i, j, dict(concepts=eprop_concepts[e])), (i, j, dict(weight_numsent=eprop_weight_numsent[e])), (i, j, dict(weight_name=eprop_name[e])) ]) eprop_weight_tfidf[e] = w g[i][j]['weight_tfidf'] = eprop_weight_tfidf[e] if title is not None: g.add_nodes_from('TITLE', name=TITLE_VERTEX_NAME, sentidxs=[], concepts=[]) #g.add_nodes_from('T', name=TITLE_VERTEX_NAME, sentidxs=[], concepts=[]) # calculate vertex scores pr = nx.pagerank(g, weight='weight_tfidf') bt = nx.betweenness_centrality(g, weight='weight_tfidf') #print(bt) try: katz = nx.katz_centrality(g, weight='weight_tfidf') except: katz = [0.0 for i in range(numv)] #numv = len(pr) for i in g.nodes(): #print(i) g.node[i]['pagerank'] = pr[i] g.node[i]['betweenness'] = bt[i] g.node[i]['katz'] = katz[i] ebt = nx.edge_betweenness(g, weight='weight_tfidf') #print(ebt) #print(g.nodes()) for i in range(len(g.nodes())): for j in range(i, len(g.nodes())): if j == i: continue if g.has_edge(i, j): g[i][j]['betweenness'] = ebt[(i, j)] return g
res = max(res, current) print(res) if __name__ == '__main__': # def main(): # global mymap, masti,n,m,G for i in range(1, n + 1): mymap[i] = 0 getGraph() timepass = defaultdict(lambda: 0) q = deque() #pr=nx.pagerank(G,0.4) if (G.number_of_edges() < 4 * G.number_of_nodes() and G.number_of_nodes() < 800): pr = nx.betweenness_centrality(G) elif (G.number_of_nodes() < 2000 and 4 * G.number_of_nodes() > G.number_of_edges()): pr = nx.betweenness_centrality(G, k=max(1, G.number_of_nodes() // 8)) elif (G.number_of_nodes() < 5000 and 10 * G.number_of_nodes() > G.number_of_edges()): pr = nx.betweenness_centrality(G, k=max(1, G.number_of_nodes() // 32)) elif (G.number_of_nodes() < 20000 and 10 * G.number_of_nodes() > G.number_of_edges()): pr = nx.betweenness_centrality(G, k=max(1, G.number_of_nodes() // 2000)) elif (G.number_of_nodes() < 50000 and 10 * G.number_of_nodes() > G.number_of_edges()): pr = nx.betweenness_centrality(G, k=max(1,