def plot_co_x(cox, start, end, size = (20,20), title = '', weighted=False, weight_threshold=10): """ Plotting function for keyword graphs Parameters -------------------- cox: the coword networkx graph; assumes that nodes have attribute 'topic' start: start year end: end year """ plt.figure(figsize=size) plt.title(title +' %s - %s'%(start,end), fontsize=18) if weighted: elarge=[(u,v) for (u,v,d) in cox.edges(data=True) if d['weight'] >weight_threshold] esmall=[(u,v) for (u,v,d) in cox.edges(data=True) if d['weight'] <=weight_threshold] pos=nx.graphviz_layout(cox) # positions for all nodes nx.draw_networkx_nodes(cox,pos, node_color= [s*4500 for s in nx.eigenvector_centrality(cox).values()], node_size = [s*6+20 for s in nx.degree(cox).values()], alpha=0.7) # edges nx.draw_networkx_edges(cox,pos,edgelist=elarge, width=1, alpha=0.5, edge_color='black') #, edge_cmap=plt.cm.Blues nx.draw_networkx_edges(cox,pos,edgelist=esmall, width=0.3,alpha=0.5,edge_color='yellow',style='dotted') # labels nx.draw_networkx_labels(cox,pos,font_size=10,font_family='sans-serif') plt.axis('off') else: nx.draw_graphviz(cox, with_labels=True, alpha = 0.8, width=0.1, fontsize=9, node_color = [s*4 for s in nx.eigenvector_centrality(cox).values()], node_size = [s*6+20 for s in nx.degree(cox).values()])
def buildGraphFromTwitterFollowing(self): while True: twitter_id=self.userq.get() #print "======================================" twitter_id_dict=json.loads(twitter_id.AsJsonString()) #print twitter_id_dict["name"] #print i.AsJsonString() #pprint.pprint(i.GetCreatedAt()) #pprint.pprint(i.GetGeo()) #pprint.pprint(i.GetLocation()) #pprint.pprint(i.GetText()) for f in self.api.GetFollowers(twitter_id): try: follower_id_dict=json.loads(f.AsJsonString()) #print follower_id_dict["name"] self.tng.add_edge(twitter_id_dict["name"],follower_id_dict["name"]) self.userq.put(f) self.no_of_vertices+=1 except: pass if self.no_of_vertices > 50: break print "======================================" nx.shell_layout(self.tng) nx.draw_networkx(self.tng) print "===========================================================================================" print "Bonacich Power Centrality of the Social Network (Twitter) Crawled - computed using PageRank" print "(a degree centrality based on social prestige)" print "===========================================================================================" print sorted(nx.pagerank(self.tng).items(),key=operator.itemgetter(1),reverse=True) print "===========================================================================================" print "Eigen Vector Centrality" print "===========================================================================================" print nx.eigenvector_centrality(self.tng) plt.show()
def eigenvector_component(seed_num, graph_json_filename=None, graph_json_str=None): if graph_json_filename is None and graph_json_str is None: return [] G = None if graph_json_str is None: G = util.load_graph(graph_json_filename=graph_json_filename) else: G = util.load_graph(graph_json_str=graph_json_str) components = list(nx.connected_components(G)) components = filter(lambda x: len(x) > 0.1 * len(G), components) total_size = sum(map(lambda x: len(x), components)) total_nodes = 0 rtn = [] for comp in components[1:]: num_nodes = int(float(len(comp)) / total_size * seed_num) component = G.subgraph(list(comp)) clse_cent = nx.eigenvector_centrality(component) collector = collections.Counter(clse_cent) clse_cent = collector.most_common(num_nodes) rtn += map(lambda (x, y): x, clse_cent) total_nodes += num_nodes num_nodes = seed_num - total_nodes component = G.subgraph(list(components[0])) clse_cent = nx.eigenvector_centrality(component) collector = collections.Counter(clse_cent) clse_cent = collector.most_common(num_nodes) rtn += map(lambda (x, y): x, clse_cent) return rtn
def eigenvector(g, recalculate=False): """ Performs robustness analysis based on eigenvector centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ m = networkx.eigenvector_centrality(g, max_iter=5000) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) x = [] y = [] largest_component = max(networkx.connected_components(g), key=len) n = len(g.nodes()) x.append(0) y.append(len(largest_component) * 1. / n) r = 0.0 for i in range(1, n - 1): g.remove_node(l.pop(0)[0]) if recalculate: try: m = networkx.eigenvector_centrality(g, max_iter=5000) except networkx.NetworkXError: break l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) largest_component = max(networkx.connected_components(g), key=len) x.append(i * 1. / n) r += len(largest_component) * 1. / n y.append(len(largest_component) * 1. / n) return x, y, r / n
def sna_calculations(g, play_file): """ :param g: a NetworkX graph object :type g: object :param play_file: the location of a play in .txt format :type play_file: string :return: returns a dictionary containing various network related figures :rtype: dict :note: also writes into results/file_name-snaCalculations.csv and results/allCharacters.csv """ file_name = os.path.splitext(os.path.basename(play_file))[0] sna_calculations_list = dict() sna_calculations_list['playType'] = file_name[0] sna_calculations_list['avDegreeCentrality'] = numpy.mean(numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avDegreeCentralityStd'] = numpy.std( numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avInDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.in_degree_centrality(g).values()), dtype=float)) sna_calculations_list['avOutDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.out_degree_centrality(g).values()), dtype=float)) try: sna_calculations_list['avShortestPathLength'] = nx.average_shortest_path_length(g) except: sna_calculations_list['avShortestPathLength'] = 'not connected' sna_calculations_list['density'] = nx.density(g) sna_calculations_list['avEigenvectorCentrality'] = numpy.mean( numpy.fromiter(iter(nx.eigenvector_centrality(g).values()), dtype=float)) sna_calculations_list['avBetweennessCentrality'] = numpy.mean( numpy.fromiter(iter(nx.betweenness_centrality(g).values()), dtype=float)) sna_calculations_list['DegreeCentrality'] = nx.degree_centrality(g) sna_calculations_list['EigenvectorCentrality'] = nx.eigenvector_centrality(g) sna_calculations_list['BetweennessCentrality'] = nx.betweenness_centrality(g) # sna_calculations.txt file sna_calc_file = csv.writer(open('results/' + file_name + '-snaCalculations.csv', 'wb'), quoting=csv.QUOTE_ALL, delimiter=';') for key, value in sna_calculations_list.items(): sna_calc_file.writerow([key, value]) # all_characters.csv file if not os.path.isfile('results/allCharacters.csv'): with open('results/allCharacters.csv', 'w') as f: f.write( 'Name;PlayType;play_file;DegreeCentrality;EigenvectorCentrality;BetweennessCentrality;speech_amount;AverageUtteranceLength\n') all_characters = open('results/allCharacters.csv', 'a') character_speech_amount = speech_amount(play_file) for character in sna_calculations_list['DegreeCentrality']: all_characters.write(character + ';' + str(sna_calculations_list['playType']) + ';' + file_name + ';' + str( sna_calculations_list['DegreeCentrality'][character]) + ';' + str( sna_calculations_list['EigenvectorCentrality'][character]) + ';' + str( sna_calculations_list['BetweennessCentrality'][character]) + ';' + str( character_speech_amount[0][character]) + ';' + str(character_speech_amount[1][character]) + '\n') all_characters.close() return sna_calculations
def eigenvector_centrality(self, iterations, withme=False, node=None, average=False): my_dict = nx.eigenvector_centrality(self.mynet, max_iter = iterations) if node==None: if withme: my_dict =nx.eigenvector_centrality(self.mynet, max_iter = iterations) new = {} new2={} for i in my_dict: new[self.id_to_name(i)] = my_dict[i] new2[i] = my_dict[i] if average: print "The average is " + str(round(sum(new.values())/float(len(new.values())),4)) else: for i,j in new.items(): print i, round(j,4) return new2 else: my_dict = nx.eigenvector_centrality(self.no_ego_net, max_iter = iterations) new = {} new2={} for i in my_dict: new[self.id_to_name(i)] = my_dict[i] new2[i] = my_dict[i] if average: print "The average is " + str(round(sum(new.values())/float(len(new.values())),4)) else: for i,j in new.items(): print i, round(j,4) return new2 else: if withme: my_dict = nx.eigenvector_centrality(self.mynet,max_iter = iterations) try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4)) except: try: return my_dict[self.name_to_id(node)] except: print "Invalid node name" else: my_dict = nx.eigenvector_centrality(self.no_ego_net,max_iter = iterations) try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4)) except: try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[[self.name_to_id(node)]],4)) except: print "Invalid node name"
def centrality_measures(self): centrality_measures = [] txt = '' # betweenness # unweighted self.unweighted_betweenness_distribution = nx.betweenness_centrality(self.G) statistics = self.Stats.get_distribution_info(self.unweighted_betweenness_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) txt += ',average betweenness centrality (unweighted)' + self.standard_text_distribution # # weighted self.weighted_betweenness_distribution = nx.betweenness_centrality(self.G, weight = self.weight_id) # statistics = self.Stats.get_distribution_info(self.weighted_betweenness_distribution) # centrality_measures.extend(statistics[:5]) # centrality_measures.extend(statistics[5]) # txt += ',average betweenness centrality (weighted)' + self.standard_text_distribution # closeness # unweighted self.unweighted_closeness_distribution = nx.closeness_centrality(self.G) statistics = self.Stats.get_distribution_info(self.unweighted_closeness_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) txt += ',average closeness centrality (unweighted)' + self.standard_text_distribution # eigen vector # right try: self.right_eigenvector_distribution = nx.eigenvector_centrality(self.G) statistics = self.Stats.get_distribution_info(self.right_eigenvector_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) except: centrality_measures.extend([0,0,0,0,0]) centrality_measures.extend([0]*len(statistics[5])) txt += ',average right eigenvector' + self.standard_text_distribution # left try: G_rev = self.G.reverse() self.lef_eigenvector_distribution = nx.eigenvector_centrality(G_rev) statistics = self.Stats.get_distribution_info(self.lef_eigenvector_distribution) centrality_measures.extend(statistics[:5]) centrality_measures.extend(statistics[5]) except: centrality_measures.extend([0,0,0,0,0]) centrality_measures.extend([0]*len(statistics[5])) txt += ',average left eigenvector' + self.standard_text_distribution return [centrality_measures, txt]
def eigenvector_apl(g, recalculate=False): """ Performs robustness analysis based on eigenvector centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ m = networkx.eigenvector_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) x = [] y = [] average_path_length = 0.0 number_of_components = 0 n = len(g.nodes()) for sg in networkx.connected_component_subgraphs(g): average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length /= number_of_components initial_apl = average_path_length r = 0.0 for i in range(1, n - 1): g.remove_node(l.pop(0)[0]) if recalculate: try: m = networkx.eigenvector_centrality(g, max_iter=5000) except networkx.NetworkXError: break l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) average_path_length = 0.0 number_of_components = 0 for sg in networkx.connected_component_subgraphs(g): if len(sg.nodes()) > 1: average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length = average_path_length / number_of_components x.append(i * 1. / initial_apl) r += average_path_length * 1. / initial_apl y.append(average_path_length * 1. / initial_apl) return x, y, r / initial_apl
def eigValue(charList, graphFile, bookNetworksPath): # Compute eigenvectors for all characters in the current chapter graph. g = nx.read_gexf(graphFile) eigCentrality = nx.eigenvector_centrality(g, max_iter=100, tol=1.0e-6, nstart=None, weight="Weight") eigValues = eigCentrality.values() # NORMALISE eigenvector values d = decimal.Decimal maxEig = max(eigValues) minEig = min(eigValues) maxMinusMin = d(maxEig) - d(minEig) if not charList: # Get top 10 overall characters from overall.gexf graph overallGraphFile = bookNetworksPath + "overall.gexf" overall_g = nx.read_gexf(overallGraphFile) overallEigCent = nx.eigenvector_centrality(overall_g, max_iter=100, tol=1.0e-6, nstart=None, weight="Weight") # sortedCentrality = dict(sorted(overallEigCent.iteritems(), key=itemgetter(1), reverse=True)[:10]) sortedCentrality = dict(sorted(overallEigCent.iteritems(), key=itemgetter(1), reverse=True)) sortedCentrality = sorted(sortedCentrality.iteritems(), key=itemgetter(1), reverse=True) charList = [seq[0] for seq in sortedCentrality] return charList else: charList = [item for item in charList] for index, item in enumerate(charList): currentChar = None for key, value in eigCentrality.iteritems(): if key == item: # Unnormalised version... charList[index] = (key, str(value)) currentChar = key # if key == item: # nummerator = d(value)-d(minEig) # if nummerator==0: # charList[index] = (key, str(0)) # else: # norm_value = (d(value)-d(minEig))/d(maxMinusMin) # charList[index] = (key, str(norm_value)) # currentChar = key # If current character is not present in the current chapter assign 0 influence. if not currentChar: charList[index] = (item, 0) return charList
def relevant_stats(G): cloC = nx.closeness_centrality(G, distance = 'distance') betC = nx.betweenness_centrality(G, weight = 'distance') katC = nx.katz_centrality(G) eigC = nx.eigenvector_centrality(G) return
def set_capacities_eigenvector_gravity(topology, capacities, capacity_unit='Mbps', max_iter=1000): """ Set link capacities proportionally to the product of the eigenvector centralities of the two end-points of the link Parameters ---------- topology : Topology The topology to which link capacities will be set capacities : list A list of all possible capacity values capacity_unit : str, optional The unit in which capacity value is expressed (e.g. Mbps, Gbps etc..) max_iter : int, optional The max number of iteration of the algorithm allowed. If a solution is not found within this period Raises ------ RuntimeError : if the algorithm does not converge in max_iter iterations """ try: centrality = nx.eigenvector_centrality(topology, max_iter=max_iter) except nx.NetworkXError: raise RuntimeError('Algorithm did not converge in %d iterations' % max_iter) _set_capacities_gravity(topology, capacities, centrality, capacity_unit)
def all_users_popular_nodes(self): Gall = self._graph_from_cursor('graph3') slots = [] for i in range(1,7): G = self._graph_from_cursor('all_posts_s%d' % i) slots.append(G) degree = nx.degree_centrality(G).items() eigen = nx.eigenvector_centrality(G).items() betweeness = nx.betweenness_centrality(G, k=20).items() topDegree = sorted(degree, key=lambda (n,x): x, reverse=True)[:10] topEigen = sorted(eigen, key=lambda (n,x): x, reverse=True)[:10] topBetweeness = sorted(betweeness, key=lambda (n,x): x, reverse=True)[:10] topDegreeIds = map(lambda (n,x): n, topDegree) topEigenIds = map(lambda (n,x): n, topEigen) topBetweenessIds = map(lambda (n,x): n, topBetweeness) inter = list(set(topDegreeIds).intersection(topEigenIds).intersection(topBetweenessIds)) union = list(set(topDegreeIds).union(topEigenIds).union(topBetweenessIds)) out = StringIO.StringIO() writer = csv.writer(out, delimiter='|', quoting=csv.QUOTE_NONE)
def eigenvector_neighbors(seed_num, graph=None, graph_json_filename=None, graph_json_str=None): if graph_json_filename is None and graph_json_str is None and graph is None: return [] G = None if graph is not None: G = graph elif graph_json_str is None: G = util.load_graph(graph_json_filename=graph_json_filename) else: G = util.load_graph(graph_json_str=graph_json_str) clse_cent = nx.get_node_attributes(G, "centrality") if len(clse_cent) == 0: clse_cent = nx.eigenvector_centrality(G) nx.set_node_attributes(G, "centrality", clse_cent) print "hi eigen-vector neighbors" collector = collections.Counter(clse_cent) clse_cent = collector.most_common(SURROUND_TOP) nodes = map(lambda (x, y): x, clse_cent) current_seed = 0 rtn = [] while current_seed < seed_num: current_node = nodes[current_seed % len(nodes)] current_neighbors = G.neighbors(current_node) rtn += random.sample(set(current_neighbors) - set(rtn) - set(nodes), 1) current_seed += 1 return rtn
def centralities(self): ''' Get info on centralities of data Params: None Returns: dictionary of centrality metrics with keys(centralities supported): degree - degree centrality betweeness - betweeness centrality eigenvector - eigenvector centrality hub - hub scores - not implemented authority - authority scores - not implemented katz - katz centrality with params X Y pagerank - pagerank centrality with params X Y ''' output = {} output['degree'] = nx.degree_centrality(self.G) output['betweeness'] = nx.betweenness_centrality(self.G) try: output['eigenvector'] = nx.eigenvector_centrality(self.G) output['katz'] = nx.katz_centrality(self.G) except: output['eigenvector'] = 'empty or exception' output['katz'] = 'empty or exception' # output['hub'] = 'Not implemented' # output['authority'] = 'Not implemented' # output['pagerank'] = 'Not implemented' return output
def main(self): G = nx.Graph() retweet = self.gettweet(TWEETID) retweet['jyunichidesita'] = (datetime.datetime(2014, 4, 28, 0, 0, 0),) # 炎上ツイートのユーザーのみの対応 userlist = retweet.keys() # node = self.getRTchannel('jyunichidesita', userlist, retweet) # 炎上ツイートをRTし、且つそのツイートのユーザーとリンクのあるユーザーの名前のリスト # 炎上ツイートを始めにRetweetしたユーザーをLIMIT件取得 node = self.getnode(TWEETID, LIMIT) while node: next = [] for n in node: tmp = self.getRTchannel(n, userlist, retweet) next.extend(tmp) edges = [(userlist.index(n), userlist.index(x)) for x in tmp] G.add_edges_from(edges) else: node = next degree = nx.degree(G) close = nx.closeness_centrality(G) bet = nx.betweenness_centrality(G) eigen = nx.eigenvector_centrality(G) f = open('../../data/output/diffusion/centrality_all.csv', 'w') for k,v in sorted(degree.items(), key=lambda x: x[1], reverse=True): f.write(str(userlist[k]) + ',' + str(retweet[userlist[k]][0]) + ',' + str(retweet[userlist[k]][3]) + ',' + str(v) + ',' + str(close[k]) + ',' + str(bet[k]) + ',' + str(eigen[k]) + '\n') f.close() nx.draw(G, node_size=50) plt.savefig("../../data/output/diffusion/undirected_all.png") plt.show()
def distinct(g): bc = nx.betweenness_centrality(g) cc = nx.closeness_centrality(g) dc = nx.degree_centrality(g) ec = nx.eigenvector_centrality(g) return [maximum(bc), maximum(cc), maximum(dc), maximum(ec)]
def describe(G, ny_tri, chems): global describeNetwork ''' Describe the network: degrees, clustering, and centrality measures ''' # Degree # The number of connections a node has to other nodes. degrees= nx.degree(G) degrees_df = pd.DataFrame(degrees.items(), columns=['Facility', 'Degrees']) values = sorted(set(degrees.values())) hist = [degrees.values().count(x) for x in values] plt.figure() plt.plot(values, hist,'ro-') # degree plt.xlabel('Degree') plt.ylabel('Number of nodes') plt.title('Degree Distribution') plt.savefig('output/degree_distribution.png') # Clustering coefficients # The bipartie clustering coefficient is a measure of local density of connections. clust_coefficients = nx.clustering(G) clust_coefficients_df = pd.DataFrame(clust_coefficients.items(), columns=['Facility', 'Clustering Coefficient']) clust_coefficients_df = clust_coefficients_df.sort('Clustering Coefficient', ascending=False) #print clust_coefficients_df # Node centrality measures FCG=list(nx.connected_component_subgraphs(G, copy=True))[0] # Current flow betweenness centrality # Current-flow betweenness centrality uses an electrical current model for information spreading # in contrast to betweenness centrality which uses shortest paths. betweeness = nx.current_flow_betweenness_centrality(FCG) betweeness_df = pd.DataFrame(betweeness.items(), columns=['Facility', 'Betweeness']) betweeness_df = betweeness_df.sort('Betweeness', ascending=False) # Closeness centrality # The closeness of a node is the distance to all other nodes in the graph # or in the case that the graph is not connected to all other nodes in the connected component containing that node. closeness = nx.closeness_centrality(FCG) closeness_df = pd.DataFrame(closeness.items(), columns=['Facility', 'Closeness']) closeness_df = closeness_df.sort('Closeness', ascending=False) # Eigenvector centrality # Eigenvector centrality computes the centrality for a node based on the centrality of its neighbors. # In other words, how connected a node is to other highly connected nodes. eigenvector = nx.eigenvector_centrality(FCG) eigenvector_df = pd.DataFrame(eigenvector.items(), columns=['Facility', 'Eigenvector']) eigenvector_df = eigenvector_df.sort('Eigenvector', ascending=False) # Create dataframe of facility info fac_info = ny_tri[['tri_facility_id','facility_name', 'primary_naics', 'parent_company_name']].drop_duplicates() fac_info.rename(columns={'facility_name':'Facility'}, inplace=True) # Merge everything describeNetwork = degrees_df.merge( clust_coefficients_df,on='Facility').merge( betweeness_df,on='Facility').merge( closeness_df, on='Facility').merge( eigenvector_df, on='Facility').merge( fac_info, on='Facility', how='left').merge( chems, on='Facility', how='left') describeNetwork = describeNetwork.sort('Degrees', ascending=False) describeNetwork.to_csv('output/describeNetwork.csv')
def create_authors(corpus): """ Generate the authors_profile.tsv file To perform just ONE time :type corpus: pandas.DataFrame :return: """ tpc = TopicsClassifier(pd_corpus=corpus) pp = PredictionProfile(pd_corpus=corpus) for index, tweet in corpus.iterrows(): u = User(tweet.User_Name) u.load() u.update_profile(tweet.Vector, predict=False) u.save() graph = User.load_graph() centralities = nx.eigenvector_centrality(graph) for author in User.get_all_authors(): author.centrality = centralities[author.id] if author.id in centralities else 0. author.set_prediction_profile(pp) author.set_topic_classifier(tpc) author.predict_profile() author.save() return
def most_central(self,F=1,cent_type='betweenness'): if cent_type == 'betweenness': ranking = nx.betweenness_centrality(self.G).items() elif cent_type == 'closeness': ranking = nx.closeness_centrality(self.G).items() elif cent_type == 'eigenvector': ranking = nx.eigenvector_centrality(self.G).items() elif cent_type == 'harmonic': ranking = nx.harmonic_centrality(self.G).items() elif cent_type == 'katz': ranking = nx.katz_centrality(self.G).items() elif cent_type == 'load': ranking = nx.load_centrality(self.G).items() elif cent_type == 'degree': ranking = nx.degree_centrality(self.G).items() ranks = [r for n,r in ranking] cent_dict = dict([(self.lab[n],r) for n,r in ranking]) m_centrality = sum(ranks) if len(ranks) > 0: m_centrality = m_centrality/len(ranks) #Create a graph with the nodes above the cutoff centrality- remove the low centrality nodes thresh = F*m_centrality lab = {} for k in self.lab: lab[k] = self.lab[k] g = Graph(self.adj.copy(),self.char_list) for n,r in ranking: if r < thresh: g.G.remove_node(n) del g.lab[n] return (cent_dict,thresh,g)
def main(): # n = get_node_list('Output.txt') # save_mapper_file(n, 'Mapper.txt') # anonymize_names("Output.txt", 'AnOutput.txt') an = get_node_list('AnOutput.txt') G = create_graph('AnOutput.txt', True) #in_deg_res, out_deg_res = get_degree_counts(G, an) # print(in_deg_res, out_deg_res) G1 = create_graph_for_snap(an, 'AnOutput.txt') # snap_traids = snap.GetTriads(G1) # triads = nx.transitivity(G) pagerank = nx.pagerank(G) max_pagerank = key_with_max_val(pagerank) import operator a = sorted(pagerank.items(), key=operator.itemgetter(1), reverse=True) print max_pagerank centrality = nx.in_degree_centrality(G) a = sorted(centrality.items(), key=operator.itemgetter(1), reverse=True) eigen_vector_centrality = nx.eigenvector_centrality(G) a = sorted(eigen_vector_centrality.items(), key=operator.itemgetter(1), reverse=True) # snap_dia = snap.GetBfsFullDiam(G1, 10) # dia = nx.diameter(G) avg_local_clustering_coeff = nx.average_clustering(G) print avg_local_clustering_coeff #global_clustering_coeff = snap.GetClustCf(G1, -1) #print global_clustering_coeff #plot_data = diameter_phase_transition() pass
def test_K5(self): """Eigenvector centrality: K5""" G = nx.complete_graph(5) b = nx.eigenvector_centrality(G) v = math.sqrt(1 / 5.0) b_answer = dict.fromkeys(G, v) for n in sorted(G): assert_almost_equal(b[n], b_answer[n]) nstart = dict([(n, 1) for n in G]) b = nx.eigenvector_centrality(G, nstart=nstart) for n in sorted(G): assert_almost_equal(b[n], b_answer[n]) b = nx.eigenvector_centrality_numpy(G) for n in sorted(G): assert_almost_equal(b[n], b_answer[n], places=3)
def eigenvectorcentralitynx(mutualinformation,startingvector): #Identical to eigenvectorcentralitynx0, but requires an additional argument startingvector. #starting vector provides an initial guess for the eigen vector centrality of all nodes. #startingvector must be a python dictionary. key = node, value = eigenvector centrality estimate. G=nx.Graph(mutualinformation) eigvcent=nx.eigenvector_centrality(G, weight='weight',max_iter=2000,nstart=startingvector) return eigvcent
def node_eigenvector_centrality(X): """ based on networkx function: eigenvector_centrality """ XX = np.zeros((X.shape[0], np.sqrt(X.shape[1]))) for i, value in enumerate(X): adj_mat = value.reshape((np.sqrt(len(value)),-1)) adj_mat = (adj_mat - np.min(adj_mat)) / (np.max(adj_mat) - np.min(adj_mat)) adj_mat = 1 - adj_mat # th = np.mean(adj_mat) - 0.2 # adj_mat = np.where(adj_mat < th, adj_mat, 0.) percent, th, adj_mat, triu = percentage_removed(adj_mat, 0.78) print("percent = {0}, threshold position = {1}, threshold = {2}\n".format(percent, th, triu[th])) g = nx.from_numpy_matrix(adj_mat) print "Graph Nodes = {0}, Graph Edges = {1} ".format(g.number_of_nodes(), g.number_of_edges()) print "\nEdge kept ratio, {0}".format(float(g.number_of_edges())/((g.number_of_nodes()*(g.number_of_nodes()-1))/2)) deg_cent = nx.eigenvector_centrality(g, max_iter=10000) node_cent = np.zeros(g.number_of_nodes()) for k in deg_cent: node_cent[k] = deg_cent[k] XX[i] = node_cent print "graph {0} => mean {1}, min {2}, max {3}".format(i, np.mean(XX[i]), np.min(XX[i]), np.max(XX[i])) # XX = XX*100 ss = StandardScaler() XX = ss.fit_transform(XX.T).T return XX
def _calc_centrality_totals(graph): """ Calculates the eigenvector centrality for every node in a graph, then assigns those centralities to different demographic groups. @param graph: the graph to calculate centrality for @return a dict mapping gender string to list of centralities @return a dict mapping major name to list of centralities @return a dict mapping activity name to list of centralities """ eigen_centralities = nx.eigenvector_centrality(graph) gender_eigen_totals = {} major_eigen_totals = {} ec_eigen_totals = {} for node in graph.nodes(data=True): gender = node[1]['gender'] major = node[1]['area_of_study'] extra_currics = node[1]['extra_curricular'] if gender in gender_eigen_totals: gender_eigen_totals[gender].append(eigen_centralities[node[0]]) else: gender_eigen_totals[gender] = [] if major in major_eigen_totals: major_eigen_totals[major].append(eigen_centralities[node[0]]) else: major_eigen_totals[major] = [] for ec in extra_currics: if ec in ec_eigen_totals: ec_eigen_totals[ec].append(eigen_centralities[node[0]]) else: ec_eigen_totals[ec] = [] return gender_eigen_totals, major_eigen_totals, ec_eigen_totals
def attack_based_max_eigenvector(G): """ Recalculate eigenvector centrality attack """ n = G.number_of_nodes() tot_ND = [0] * (n+1) tot_T = [0] * (n+1) ND, ND_lambda = ECT.get_number_of_driver_nodes(G) tot_ND[0] = ND tot_T[0] = 0 for i in range(1, n+1): # calculate all nodes' eigenvector centrality allEigenvectorCentrality = nx.eigenvector_centrality(G, max_iter=1000, weight=None) # get node with max eigenvector centrality node = max(allEigenvectorCentrality, key=allEigenvectorCentrality.get) # remove all the edges adjacent to node if not nx.is_directed(G): # undirected graph for key in G[node].keys(): G.remove_edge(node, key) else: # directed graph for x in [v for u, v in G.out_edges_iter(node)]: G.remove_edge(node, x) for x in [u for u, v in G.in_edges_iter(node)]: G.remove_edge(x, node) ND, ND_lambda = ECT.get_number_of_driver_nodes(G) tot_ND[i] = ND tot_T[i] = i return (tot_ND, tot_T)
def print_most_often_optimal(self, bests): ''' Print those cities which are most often in optimal layouts. ''' print("The centrality measure previously discussed is a good judge of " + "how good building a research center in a city is, but let's check our " + "work by counting how many times each city appears in the optimal " + "placements.") occurences = [item for sublist in bests for subsublist in sublist for item in subsublist] cities = list(self) for city in sorted(cities, key=occurences.count): if occurences.count(city): print(city, occurences.count(city)) lone = [] ltwo = [] centrality = nx.eigenvector_centrality(self, max_iter=1000) for city in cities: lone.append(occurences.count(city)) ltwo.append(centrality[city]) (corr, pvalue) = stats.pearsonr(lone, ltwo) print ("The correlation is " + str((corr, pvalue))) lone = [] ltwo = [] for city in cities: if city != "Atlanta": lone.append(occurences.count(city)) ltwo.append(centrality[city]) (corr, pvalue) = stats.pearsonr(lone, ltwo) print ("The correlation without Atlanta is " + str((corr, pvalue)))
def _graph_centrality_measures(self, df_totals): ''' INPUT: DataFrame OUTPUT: dict, dict, dict For every participant, calculates degree centrality, Eigenvector centrality, and weighted Eigenvector centrality (the last being weighted by the df's 'cnt' column). ''' df = df_totals.copy() df = df[df['participantID'] > df['participantID.B']] G = from_pandas_dataframe(df, 'participantID', 'participantID.B', 'cnt') degree_centrality = nx.degree_centrality(G) eigen_centrality = nx.eigenvector_centrality(G) eigen_centrality_weighted = nx.eigenvector_centrality(G, weight='cnt') return degree_centrality, eigen_centrality, eigen_centrality_weighted
def __init__(self, time, voteomat): self.foldername = voteomat.network_func_name + voteomat.distribution_func_name self.foldertime = time self.path = "Statistics//"+self.foldername+"//" self.path += g_candidates_affecting_nodes + "=" + str(voteomat.candidates_affecting) + "_" self.path += g_candidates_affected_by_median + "=" + str(voteomat.candidates_affected) + "_" self.path += g_neighbours_affecting_each_other + "=" + str(voteomat.affecting_neighbours) + "_" self.path += g_counterforce_affecting_candidates + "=" + str(voteomat.counter_force_affecting) + "_" self.path += "counterforce_left="+str(voteomat.counter_force_left)+"_"+"counterforce_right="+str(voteomat.counter_force_right)+ "_" + time self.make_sure_path_exists(self.path) self.file = open(self.path + "//statistic.csv", 'w') self.statistic = {} self.statistic["networkfunc"] = voteomat.network_func_name self.statistic["distributionfunc"] = voteomat.distribution_func_name self.statistic["acceptance"] = voteomat.acceptance median, avg, std = voteomat.get_statistic() self.statistic["median"] = [] self.statistic["median"].append(median) self.statistic["avg"] = [] self.statistic["avg"].append(avg) self.statistic["std"] = [] self.statistic["std"].append(std) self.statistic["node_with_highest_degree_centrality"] = [] self.max_degree_node = max( nx.degree_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[0] self.statistic["node_with_highest_degree_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_degree_node][1]["orientation"]) self.statistic["node_with_minimum_degree_centrality"] = [] self.min_degree_node = min(nx.degree_centrality(voteomat.get_network()).items(), key = lambda x: x[1])[0] self.statistic["node_with_minimum_degree_centrality"].append(voteomat.get_network().nodes(data = True)[self.min_degree_node][1]["orientation"]) self.statistic["node_with_highest_closeness_centrality"] = [] self.max_closeness_node = max( nx.closeness_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[0] self.statistic["node_with_highest_closeness_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_closeness_node][1]["orientation"]) self.statistic["node_with_highest_betweenness_centrality"] = [] self.max_betweenness_node = max(nx.betweenness_centrality(voteomat.get_network()).items() ,key = lambda x: x[1])[0] self.statistic["node_with_highest_betweenness_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_betweenness_node][1]["orientation"]) try: self.statistic["node_with_highest_eigenvector_centrality"] = [] self.max_eigenvector_node = max( nx.eigenvector_centrality(voteomat.get_network(), max_iter = 1000).items(),key = lambda x: x[1])[0] self.statistic["node_with_highest_eigenvector_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_eigenvector_node][1]["orientation"]) except nx.NetworkXError: print "Eigenvector centrality not possible." freeman = self.freeman_centrality([x[1] for x in nx.degree_centrality(voteomat.get_network()).items()], max( nx.degree_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[1]) self.statistic["freeman_centrality"] = round(freeman,2) self.statistic["affecting_neighbours"] = voteomat.affecting_neighbours self.statistic["affecting_candidates"] = voteomat.candidates_affecting self.statistic["affected_canddiates"] = voteomat.candidates_affected self.statistic["affecting_counter_force"] = voteomat.counter_force_affecting self.statistic["affecting_counter_force_left"] = voteomat.counter_force_left self.statistic["affecting_counter_force_right"] = voteomat.counter_force_right self.statistic["candidates"] = [] for candidate in voteomat.candidates: self.statistic["candidates"].append(candidate.to_save()) self.statistic["network"] = voteomat.get_network().nodes(data=True);
def main(): G = nx.Graph() folderKorpus = os.path.abspath('.') + '\\tempo-txt' #folderKorpus = 'tempo-txt' ''' for root, files, dirs in os.walk(folderKorpus): for name in files: print(os.path.join(root, name)) ''' ctrBerkas = 0 sentTokenizer = nltk.data.load('tokenizers/punkt/english.pickle') daftarBerkas = grab_files(folderKorpus) termList = [] for berkas in daftarBerkas: data = open(berkas) ctrBerkas += 1 ctrBaris = 0 for baris in data: ctrBaris += 1 sents = sentTokenizer.tokenize(baris) for sent in sents: kalimat = sent.replace('\n', ' ').strip() if len(kalimat) > 0: kalimat = kalimat.lower() tokens = nltk.word_tokenize(kalimat) #print ctrBaris, len(tokens), tokens akhirKalimat = tokens[len(tokens)-1] ''' if akhirKalimat != '.': print(berkas) print(ctrBaris) #print(kalimat) print(akhirKalimat) ''' if tokens[0] not in termList: termList.append(tokens[0]) #print(tokens[0]) #print(ctrBerkas, len(termList)) for idx in range(1,len(tokens)-1): G.add_edge(tokens[idx-1], tokens[idx]) if tokens[idx] not in termList: termList.append(tokens[idx]) #print(tokens[idx]) #print(ctrBerkas, len(termList)) #text = nltk.Text(tokens) data.close() pprint.pprint(tokens) ''' nx.draw(G) plt.show() ''' print("%d berkas diolah" % ctrBerkas) print("%d term diolah" % len(termList)) ce = nx.eigenvector_centrality(G) print(sorted(['%0.2f %s'%(ce[node], node) for node in ce]))
def eigenvector_centrality_ranking(self): try: results = nx.eigenvector_centrality(self.graph) except nx.NetworkXError: print('Eigenvector error') results = {} return self.create_ranking(results)
def test_multigraph(self): with pytest.raises(nx.NetworkXException): e = nx.eigenvector_centrality(nx.MultiGraph())
def createNetwork(): d = dirname(dirname(abspath(__file__))) + '/dataset/' with open(d + 'status_user_dict.json', 'r') as infile: status_user_dict = json.load(infile) with open(d + 'user_retweeter_dict.json', 'r') as infile: user_retweeter_dict = json.load(infile) # with open(d + 'useridtype_dict_revised.json', 'r') as infile: # useridtype_dict = json.load(infile) with open(d + 'user_id_dict.json', 'r') as jsonfile: user_id_dict = json.load(jsonfile) with open(d + 'user_type_dict.json', 'r') as jsonfile: user_type_dict = json.load(jsonfile) user_id_dict = {k: int(v) for k, v in user_id_dict.items()} # user_id_dict["michaelianblack"] = 21035409 # user_id_dict["TheEllenShow"] = 15846407 # user_id_dict["CraigyFerg"] = 112508240 # user_id_dict["bheater"] = 15741636 # user_id_dict["hodgman"] = 14348594 for name, id in list(user_id_dict.items()): user_id_dict[id] = name # print("Number of users with status:", len(set(status_user_dict.values()))) userset = set(status_user_dict.values()) interuser_retweeterdict = defaultdict( list, { int(u): set([e for e in li if e in userset]) for u, li in user_retweeter_dict.items() }) # print([len(v) for v in interuser_retweeterdict.values()]) G = nx.DiGraph() for sourceuser, retuserlist in interuser_retweeterdict.items(): updateGraph(G, sourceuser, retuserlist) nodelist1 = user_type_dict["1"] nodelist2 = user_type_dict["2"] plt.figure() pos = nx.spring_layout(G) nx.draw_networkx_nodes(G, pos, nodelist=[e for e in G.nodes() if e in nodelist1], node_size=100, cmap=plt.get_cmap('jet'), node_color='red') nx.draw_networkx_nodes( G, pos, nodelist=[e for e in G.nodes() if e not in nodelist1], node_size=100, cmap=plt.get_cmap('jet'), node_color='blue') # nx.draw_networkx_labels(G, pos, labels = user_id_dict) # nx.draw(G) nx.draw_networkx_edges(G, pos, edge_color='k', arrows=False) plt.show() # calcualte centrality centrality_eigen = nx.eigenvector_centrality(G) print( sorted([(user_id_dict[node], centrality_eigen[node]) for node in centrality_eigen], key=lambda x: x[1], reverse=True)) centrality_degree = nx.degree_centrality(G) print( sorted([(user_id_dict[node], centrality_degree[node]) for node in centrality_degree], key=lambda x: x[1], reverse=True)) centrality_indegree = nx.in_degree_centrality(G) print( sorted([(user_id_dict[node], centrality_indegree[node]) for node in centrality_indegree], key=lambda x: x[1], reverse=True)) centrality_outdegree = nx.out_degree_centrality(G) print( sorted([(user_id_dict[node], centrality_outdegree[node]) for node in centrality_outdegree], key=lambda x: x[1], reverse=True)) indegree_outdegree_node_tuple = [(centrality_indegree[k], centrality_outdegree[k], user_id_dict[k]) for k in G.nodes()] print(sorted(indegree_outdegree_node_tuple, reverse=True)) plt.figure(2) plt.scatter([e[0] for e in indegree_outdegree_node_tuple], [e[1] for e in indegree_outdegree_node_tuple]) # plt. plt.show()
def test_empty(self): with pytest.raises(nx.NetworkXException): e = nx.eigenvector_centrality(nx.Graph())
facebook_net = build_facebook_net() #Degree centrality top 10 deg = nx.degree(facebook_net) deg_sorted = sorted(deg.items(), key=operator.itemgetter(1), reverse=True) print("Top 10 degree centrality (node, centrality): ", deg_sorted[0:9]) #Closeness centrality top 10 clo = nx.closeness_centrality(facebook_net) clo_sorted = sorted(clo.items(), key=operator.itemgetter(1), reverse=True) print("Top 10 closeness centrality (node, centrality): ", clo_sorted[0:9]) #Betweenness centrality top 10 bet = nx.betweenness_centrality(facebook_net) bet_sorted = sorted(bet.items(), key=operator.itemgetter(1), reverse=True) print("Top 10 betweenness centrality (node, centrality): ", bet_sorted[0:9]) #Eigenvector centrality top 10 eig = nx.eigenvector_centrality(facebook_net) eig_sorted = sorted(eig.items(), key=operator.itemgetter(1), reverse=True) print("Top 10 eigenvector centrality (node, centrality): ", eig_sorted[0:9]) #Pagerank centrality top 10 pag = nx.pagerank(facebook_net) pag_sorted = sorted(pag.items(), key=operator.itemgetter(1), reverse=True) print("Top 10 pagerank centrality (node, centrality): ", pag_sorted[0:9]) #Trim network to only show nodes with more than 1 connection facebook_net_trimmed = facebook_net.copy() for n in facebook_net_trimmed.nodes(): if deg[n] < 2: facebook_net_trimmed.remove_node(n) #View all cliques cliques = list(nx.find_cliques(facebook_net_trimmed))
str(userToID[int(ed[0])]) + ',' + str(userToID[int(ed[1])]) for ed in edges if int(ed[0]) in userToID and int(ed[1]) in userToID ] g = nx.parse_edgelist(edgeList, delimiter=',', create_using=nx.DiGraph(), nodetype=int) missing_nodes = [int(uid) for uid in IDtoUser if int(uid) not in g.nodes()] g.add_nodes_from(missing_nodes) adjMat = nx.adjacency_matrix(g) degCent = nx.in_degree_centrality(g) degCent = [degCent[int(uid)] for uid in IDtoUser] eigenCent = nx.eigenvector_centrality(g) eigenCent = [eigenCent[int(uid)] for uid in IDtoUser] # simMat = [ float(np.dot(R[int(i),:],R[int(j),:])) / (1+math.sqrt(np.sum(R[int(i),:][np.where(R[int(j),:]>0)[0]]**2)*np.sum(R[int(j),:][np.where(R[int(i),:]>0)[0]]**2))) for uid in IDtoUser.keys() for i,j in zip( [uid]*len(IDtoUser.keys()), IDtoUser.keys() ) ] # print(len(simMat)) # simMat = np.reshape(simMat, (len(IDtoUser.keys()),len(IDtoUser.keys()))) # print(simMat.shape) # with h5py.File('sim_pairs.h5', 'w') as hf: # hf.create_dataset('S', data=simMat) simMat = None with h5py.File('sim_pairs.h5', 'r') as hf: simMat = hf['S'][:]
def get_graph_metrics(connectivity_vector) : # reshape into matrix connectivity_matrix = np.reshape(connectivity_vector, (90, 90)) # convert to networkx graph connectivity_graph = nwx.from_numpy_matrix(connectivity_matrix) # convert to distance graph as some metrics need this instead distance_matrix = connectivity_matrix distance_matrix[distance_matrix == 0] = np.finfo(np.float32).eps distance_matrix = 1.0 / distance_matrix distance_graph = nwx.from_numpy_matrix(distance_matrix) # intialise vector of metrics metrics = np.zeros((21,)) # fill the vector of metrics # 1 and 2: degree distribution degrees = np.sum(connectivity_matrix, axis = 1) metrics[0] = np.mean(degrees) metrics[1] = np.std(degrees) # 3 and 4: weight distribution weights = np.tril(connectivity_matrix, k = -1) metrics[2] = np.mean(weights) metrics[3] = np.std(weights) # 5: average shortest path length # transform weights to distances so this makes sense metrics[4] = nwx.average_shortest_path_length(distance_graph, weight='weight') # 6: assortativity metrics[5] = nwx.degree_assortativity_coefficient(connectivity_graph, weight='None') # 7: clustering coefficient metrics[6] = nwx.average_clustering(connectivity_graph, weight='weight') # 8: transitivity metrics[7] = nwx.transitivity(connectivity_graph) # 9 & 10: local and global efficiency metrics[8] = np.mean(bct.efficiency_wei(connectivity_matrix, local=True)) metrics[9] = bct.efficiency_wei(connectivity_matrix, local=False) # 11: Clustering coefficient metrics[10] = np.mean(nwx.clustering(connectivity_graph, weight='weight').values()) # 12 & 13: Betweeness centrality metrics[11] = np.mean(nwx.betweenness_centrality(distance_graph, weight='weight').values()) metrics[12] = np.mean(nwx.current_flow_betweenness_centrality(distance_graph, weight='weight').values()) # 14: Eigenvector centrality metrics[13] = np.mean(nwx.eigenvector_centrality(distance_graph, weight='weight').values()) # 15: Closeness centrality metrics[14] = np.mean(nwx.closeness_centrality(distance_graph, distance='weight').values()) # 16: PageRank metrics[15] = np.mean(nwx.pagerank(connectivity_graph, weight='weight').values()) # 17: Rich club coefficient metrics[16] = np.mean(nwx.rich_club_coefficient(connectivity_graph).values()) # 18: Density metrics[17] = bct.density_und(connectivity_matrix)[0] # 19, 20, 21: Eccentricity, radius, diameter spl_all = nwx.shortest_path_length(distance_graph, weight='weight') eccs = np.zeros(90,) for i in range(90) : eccs[i] = np.max(spl_all[i].values()) metrics[18] = np.mean(eccs) metrics[19] = np.min(eccs) metrics[20] = np.max(eccs) return metrics
def main(): """ Models the whole dataset using features and output to a file. Args: None. Returns: None. """ for i in xrange(1, 5): print 'Reading data' trusts = load(open('%s/trusts.pkl' % _PKL_DIR, 'r')) reviews = load(open('%s/reviews-%d.pkl' % (_PKL_DIR, i), 'r')) users = load(open('%s/users-%d.pkl' % (_PKL_DIR, i), 'r')) train = load(open('%s/train-%d.pkl' % (_PKL_DIR, i), 'r')) validation = load(open('%s/validation-%d.pkl' % (_PKL_DIR, i), 'r')) test = load(open('%s/test-%d.pkl' % (_PKL_DIR, i), 'r')) sim = load(open('%s/sim-%d.pkl' % (_PKL_DIR, i), 'r')) conn = load(open('%s/conn-%d.pkl' % (_PKL_DIR, i), 'r')) print 'Generating similarity' avg_user = compute_avg_user(users) close = {} eigen = eigenvector_centrality(trusts) for author, voter in sim: author_dic = users[author] if author in users else avg_user voter_dic = users[voter] if voter in users else avg_user # if any feature is nan, the derivated becomes nan and will be imputated sim[(author, voter)]['diff_trustors'] = author_dic['num_trustors'] - \ voter_dic['num_trustors'] sim[(author, voter)]['diff_reviews'] = author_dic['num_reviews'] - \ voter_dic['num_reviews'] sim[(author, voter)]['diff_pagerank'] = author_dic['pagerank'] - \ voter_dic['pagerank'] if voter not in close: close[voter] = closeness_centrality(trusts, voter) if voter in trusts \ else nan if author not in close: close[author] = closeness_centrality(trusts, author) if author in trusts\ else nan sim[(author, voter)]['diff_close'] = close[author] - close[voter] if voter not in eigen: eigen[voter] = nan if author not in eigen: eigen[author] = nan conn[(author, voter)]['diff_eigen'] = eigen[author] - eigen[voter] dump(sim, open('%s/new-sim-%d.pkl' % (_PKL_DIR, i), 'w')) print 'Generating connection' paths = {} for author, voter in conn: conn[(author, voter)]['voter_trust'] = 1 if \ trusts.has_edge(voter, author) else 0 conn[(author, voter)]['author_trust'] = 1 if \ trusts.has_edge(author, voter) else 0 if voter not in paths and voter in trusts: paths[voter] = single_source_shortest_path_length( trusts, voter) if author not in paths and author in trusts: paths[author] = single_source_shortest_path_length( trusts, author) conn[(author, voter)]['inv_from_vot_path'] = 0 if voter not in trusts \ or author not in paths[voter] else (1.0 / float(paths[voter][author])) conn[(author, voter)]['inv_from_aut_path'] = 0 if author not in trusts \ or voter not in paths[author] else (1.0 / float(paths[author][voter])) dump(conn, open('%s/new-conn-%d.pkl' % (_PKL_DIR, i), 'w'))
def hbase_test(): # Default return value default_return = {'nodes': [], 'edges': []} # Input sanity checks search = request.args.get('search', '') if search == None or search == "": print "SD> WARN: Search query is empty" return default_return elif isinstance(search, str): if search.isdigit(): search = int(search) else: print "SD> WARN: Search should be a digit" return default_return search_str = str(search) # Establish contact with database cluster = Cluster(contact_points=['54.219.144.56'], ) session = cluster.connect('harary') # Look for node in database community_id = session.execute( "SELECT community FROM node_community_table WHERE source = " + search_str) if len(community_id) == 0: print "SD> WARN: Could not find node " + search_str + " in database" return default_return community_str = str(community_id[0].community) print "SD> INFO: Node " + search_str + " was found in database with community " + community_str # Search for community members print "SD> INFO: Executing query: " + "SELECT * FROM node_community_table WHERE community = " + community_str + " ALLOW FILTERING" result = session.execute( "SELECT * FROM node_community_table WHERE community = " + community_str + " ALLOW FILTERING;") print "SD> INFO: Query result: " + str( len(result)) + " members were found for community " + community_str # Empty result scenario if len(result) == 0: return default_return # Extreme cases are truncated for practicality max_number_of_nodes = 2000 if len(result) > max_number_of_nodes: print "SD> WARN: Excessive number of node (%i). Something is probably wrong.." % len( result) result = result[0:max_number_of_nodes] node_index = 0 edge_index = 0 # Allocate the number of nodes expected_number_of_nodes = len(result) nodes = [{ 'id': '0', 'index': '0', 'label': '', 'community': 0, 'x': 0, 'y': 0, 'size': 10 } for k in range(expected_number_of_nodes)] # Sigma.js # edges = [{'id': '0', 'source':'0', 'target':'0'} for k in range(expected_number_of_nodes * expected_number_of_nodes)] # D3 edges = [{ 'source': 100, 'target': 1000, 'id': 0 } for k in range(expected_number_of_nodes * expected_number_of_nodes)] # Filter for visualization def filter(x): return len(x.target) < 50 # Map ID to linear range for D3 keys = [r.source for r in result if filter(r)] values = range(len(keys)) dictionary = dict(zip(keys, values)) # Add all nodes for node in result: if filter(node): nodes[node_index]['id'] = str(dictionary[node.source]) nodes[node_index]['index'] = str(node.source) nodes[node_index]['community'] = node.community nodes[node_index]['label'] = "Node: " + str(node.source) nodes[node_index]['x'] = random.random() nodes[node_index]['y'] = random.random() node_index = node_index + 1 if node.target != None: # Add all edges for target in node.target: if target in keys: edges[edge_index]['source'] = dictionary[node.source] edges[edge_index]['target'] = dictionary[target] edges[edge_index]['id'] = str(edge_index) edge_index = edge_index + 1 # Truncate excess nodes = nodes[0:node_index] edges = edges[0:edge_index] # Build graph from json G = json_graph.node_link_graph({ 'nodes': nodes, 'links': edges }, False, True) DiG = nx.DiGraph(G) G = nx.Graph(G) # On the fly computation of properties on manageable sizes bet_cen = nx.betweenness_centrality(G) clo_cen = nx.closeness_centrality(G) eig_cen = nx.eigenvector_centrality(G) pr = nx.pagerank(DiG, alpha=0.9) deg = G.degree() com = community.best_partition(G) for node in nodes: node['betweenness'] = bet_cen[node['id']] node['closeness'] = clo_cen[node['id']] node['eigenvector'] = eig_cen[node['id']] node['pagerank'] = pr[node['id']] node['degree'] = deg[node['id']] node['community'] = com[node['id']] # Return json string return json.dumps({'nodes': nodes, 'edges': edges})
arrows=False, with_labels=True, node_size=[ 200 + ((averagee_bacon - rg.node[d]['bacon']) * 50) for d in rg.nodes ], width=0.1, style='dashed', cmap=plt.get_cmap("viridis_r"), node_color=[(averagee_bacon - rg.node[d]['bacon']) for d in rg.nodes], label=seed) fig.savefig('fig/eigenvalues') fig.clear() eigenvector_centrality = nx.eigenvector_centrality(rg, max_iter=300) ord_lc = sorted(eigenvector_centrality.items(), key=itemgetter(1), reverse=True) labels = {} for k, v in ord_lc[0:5]: labels[k] = k fig = plt.figure(num=None, figsize=(15, 10), dpi=80, facecolor='w', edgecolor='k') nx.draw_networkx(rg, pos=pos_a, arrows=False,
def getEigenvectorC(self): mc = self.getMainComponent() return nx.eigenvector_centrality(mc)
def test_eigenvector_centrality_unweighted(self): G = self.H p = networkx.eigenvector_centrality(G, tol=1.e-08) for (a, b) in zip(p.values(), self.G.evc): assert_almost_equal(a, b)
def top_n_evcentrality(graph, n=10): centrality = nx.eigenvector_centrality(graph) sorted_ev = sorted(((v, '{:0.2f}'.format(c)) for v, c in centrality.items()), key=itemgetter(1), reverse=True) return sorted_ev[:n]
for i in M.nodes(): M.add_node(i, group=nodes[i]['group']) M.add_node(i, name=nodes[i]['name']) #Write out graph data in JSON file jsonData = json_graph.node_link_data(M) with open('data/miserables.json', 'w') as outfile: json.dump(jsonData, outfile, indent=4) #print "list of nodes: " #print M.nodes(data = True) #print "list of edges: " #print M.edges(data = True) #Eigenvector centrality criteria Meigen = nx.eigenvector_centrality(M) normeigen = [float(i) / max(Meigen.values()) for i in Meigen.values()] #Closeness centrality Mclose = nx.closeness_centrality(M) normclose = Mclose.values() #Betweeness centrality Mbetween = nx.betweenness_centrality(M) normbetween = Mbetween.values() #Graph edges in list form Medges = [i for i in M.edges()] #Layout pos = nx.fruchterman_reingold_layout(M, dim=2)
#Genre=['c','d','c','c','c','c','d','d','d','c','d','d','d','d','d','d','d','c'] #TestSet os.chdir('/home/kel/Desktop/SocialNetworkAnalysis/') fileE = pd.read_csv("flickrEdges_adj.tsv", sep='\t') #read from file file Title = fileE.columns print(Title) fileE = fileE.rename(columns={ Title[0]: 'Source', Title[1]: 'Target', Title[2]: 'Degree' }) Gs = nx.from_pandas_edgelist(fileE, source='Source', target='Target') Ga = Gs.to_directed() centrality = nx.eigenvector_centrality(Ga, max_iter=20) sorted((v, f"{c:0.2f}") for v, c in centrality.items()) fileE['Eigenvalue'] = np.nan #for number in fileE['Source']: # value=centrality.get(number) # fileE['Eigenvalue'][np.where(fileE.Source==number)[0]]=value # fileE['Eigenvalue'] = np.where(fileE.Source==number, value,fileE['Eigenvalue']) eigenvalues = pd.DataFrame(centrality.items()) n = len(fileE['Source']) dataS = fileE['Source'] dataE = fileE['Eigenvalue'] dataD = fileE['Degree'] dataE = np.array(dataE).reshape((len(dataE), 1)) dataS = np.array(dataS).reshape((len(dataS), 1)) Data = np.hstack((dataE, dataS))
def test_eigenvector_centrality_unweighted(self): G = self.H p = nx.eigenvector_centrality(G) for (a, b) in zip(list(p.values()), self.G.evc): assert a == pytest.approx(b, abs=1e-4)
print degreeCentrality print "harmonic centrality" for index in range(0, 16): if (index != 11): sum = 0 for index2 in range(0, 16): if (index != index2 and index != 11 and index2 != 11): #again, don't try for vertex 11 sum += networkx.shortest_path_length(graph, index, index2) print(1 / float(sum)) / 15 else: print "0" print "eigenvector centrality" eigenvectorCentrality = networkx.eigenvector_centrality(graph) for index in range(0, 16): print eigenvectorCentrality[index] #betweenness, didn't use networkx command to allow for multiple shortest paths print "betweenness centrality" for index in range(0, 16): counter = 0 counter2 = 0 for item in shortestPaths: for item2 in item: counter2 += 1 if (index in item2): counter += 1
def get_eigen(self, n1, n2): self.eigen_centrality = nx.eigenvector_centrality(self.graph, n1, n2)
def metrics(segments): """Calculates network metrics for play""" G = nx.Graph() weights = {} for seg in segments: speakers = seg.get('speakers', []) length = len(speakers) # if segment has only one speaker we add her as a node to make sure she # is included in the graph even if she has no connections if length == 1: G.add_node(speakers[0]) for i in range(length): if i < length - 1: source = speakers[i] others = speakers[i + 1:length] for target in others: edge = tuple(sorted((source, target))) weights[edge] = weights.get(edge, 0) + 1 G.add_weighted_edges_from([(n[0], n[1], w) for n, w in weights.items()]) size = len(list(G.nodes)) max_degree = max([d for n, d in G.degree()]) max_degree_ids = [n for n, d in G.degree() if d == max_degree] path_lengths = [ y for x in nx.shortest_path_length(G) for y in x[1].values() if y > 0 ] nodes = {} wd = G.degree(None, 'weight') cc = nx.closeness_centrality(G) bc = nx.betweenness_centrality(G) # FIXME: nx.eigenvector_centrality throws an exception with # https://dracor.org/api/corpora/rus/play/lermontov-strannyj-chelovek # we catch this here so we can still yield the rest of the metrics. try: ec = nx.eigenvector_centrality(G) except nx.exception.PowerIterationFailedConvergence: ec = {} for n, d in G.degree(): nodes[n] = { 'degree': d, 'weightedDegree': wd[n], 'betweenness': bc[n], 'closeness': cc[n] } if n in ec: nodes[n]['eigenvector'] = ec[n] return { 'size': size, 'density': nx.density(G), 'diameter': max(path_lengths) if len(path_lengths) else 0, 'averagePathLength': (sum(path_lengths) / len(path_lengths)) if len(path_lengths) else 0, 'averageDegree': sum([d for n, d in G.degree()]) / size, 'averageClustering': nx.average_clustering(G), 'maxDegree': max_degree, 'maxDegreeIds': max_degree_ids, 'numConnectedComponents': nx.number_connected_components(G), 'nodes': nodes }
def compute_graph_features(): # Load data train = pd.read_csv("./data/train.csv", names=['row_ID', 'text_a_ID', 'text_b_ID', 'text_a_text', 'text_b_text', 'have_same_meaning'], index_col=0) test = pd.read_csv("./data/test.csv", names=['row_ID', 'text_a_ID', 'text_b_ID', 'text_a_text', 'text_b_text', 'have_same_meaning'], index_col=0) # Load weights for graph depending on which file is available if os.path.exists("./data/distance_features_train.csv") and os.path.exists("./data/distance_features_test.csv"): print("Use tfidf dist as weights for graph.") train_pred_features = pd.read_csv("./data/distance_features_train.csv") test_pred_features = pd.read_csv("./data/distance_features_test.csv") train["weight"] = train_pred_features["tfidf_dist_cosine"] test["weight"] = test_pred_features["tfidf_dist_cosine"] elif os.path.exists("./predictions/predictions_ensemble_train.csv") and os.path.exists("./predictions/predictions_ensemble_test.csv"): print("Use previous predictions as weights for graph.") train_pred_features = pd.read_csv("./predictions/predictions_ensemble_train.csv") test_pred_features = pd.read_csv("./predictions/predictions_ensemble_test.csv") train["weight"] = train_pred_features["weight"] test["weight"] = test_pred_features["weight"] else: print("Use uniform weights for graph.") train["weight"] = 1 test["weight"] = 1 # Hyperparameters max_freq = 50 max_neighbors = 30 n_k_cores = 12 max_level = 3 # Start computation all_question_ids = pd.concat([train["text_a_ID"], train["text_b_ID"], test["text_a_ID"], test["text_b_ID"]]) unique_question_ids = all_question_ids.unique() def shortestPathShortness(row): g.remove_edge(row['text_a_ID'], row['text_b_ID']) try: length = nx.shortest_path_length(g, row['text_a_ID'], row['text_b_ID'], weight="weight") if length != 0: sps = 1 / length else: sps = 0 except nx.NetworkXNoPath: sps=0 g.add_edge(row['text_a_ID'], row['text_b_ID'], weight=row["weight"]) return sps def get_neighbors(qid): neighbors = nx.single_source_shortest_path_length(g, qid, cutoff=2) neighbors_df = pd.DataFrame(list(zip(neighbors.keys(), neighbors.values())), index=neighbors.keys(), columns=["qid", "n_level"]) neighbors = [] for i in range(1, max_level+1): neighbors.append(neighbors_df[neighbors_df.n_level==i].qid.values) return neighbors print("Build Graph...") nodes = pd.concat([train.text_a_ID, train.text_b_ID, test.text_a_ID,test.text_b_ID]).values edges = pd.concat([train[["text_a_ID", "text_b_ID", "weight"]], test[["text_a_ID", "text_b_ID", "weight"]]]).values g = nx.Graph() g.add_nodes_from(nodes) for e in edges: g.add_edge(int(e[0]), int(e[1]), weight=e[2]) g.remove_edges_from(g.selfloop_edges()) print("Compute question specific features...") df_questions = pd.DataFrame(unique_question_ids, columns=["qid"]) df_questions.index = df_questions.qid print("--> Compute k cores...") df_questions["k_core"] = 0 for i in range(2,n_k_cores): print("\t--> core {}".format(i)) k_core = nx.k_core(g, k=i).nodes() df_questions.loc[df_questions.qid.isin(k_core), "k_core"] = i print("--> Compute neighbors...") neighbors = df_questions.qid.apply(get_neighbors) for i in range(1, max_level+1): df_questions["neighbors" + str(i)] = neighbors.apply(lambda x: set(x[i-1])) print("--> Compute question frequency...") df_questions["frequency"] = all_question_ids.value_counts() print("--> Compute page rank...") pageranks = nx.pagerank(g, weight='weight') df_questions["page_rank"] = df_questions.qid.apply(lambda qid: pageranks[qid]) print("--> Compute closeness centrality...") closeness_centrality = nx.closeness_centrality(g) df_questions["closeness_centrality"] = df_questions.qid.apply(lambda qid: closeness_centrality[qid]) print("--> Compute clustering...") clustering = nx.clustering(g, weight='weight') df_questions["clustering"] = df_questions.qid.apply(lambda qid: clustering[qid]) print("--> Compute eigenvector centrality...") eigenvector_centrality = nx.eigenvector_centrality(g, weight='weight') df_questions["eigenvector_centrality"] = df_questions.qid.apply(lambda qid: eigenvector_centrality[qid]) def preprocess(df): df_features = pd.DataFrame(index=df.index) df_intermediate = pd.DataFrame(index=df.index) print("--> Compute shortest path shortness...") df_features["shortest_path_shortness"] = df.apply(lambda x: shortestPathShortness(x), axis=1) print("--> Compute frequency features...") df_intermediate["freq_a"] = df_questions.loc[df.text_a_ID, "frequency"].values df_intermediate["freq_b"] = df_questions.loc[df.text_b_ID, "frequency"].values df_features["frequency_min"] = df_intermediate[["freq_a", "freq_b"]].min(axis=1).apply(lambda x: min(x,max_freq)) df_features["frequency_max"] = df_intermediate[["freq_a", "freq_b"]].max(axis=1).apply(lambda x: min(x,max_freq)) print("--> Compute neighbor features...") for i in range(1, max_level+1): df_intermediate["neighbors_a"] = df_questions.loc[df.text_a_ID, "neighbors" + str(i)].values df_intermediate["neighbors_b"] = df_questions.loc[df.text_b_ID, "neighbors" + str(i)].values df_intermediate["common_neighbors"] = df_intermediate.apply(lambda x: len(list(x.neighbors_a.intersection(x.neighbors_b))), axis=1) df_features["common_neighbors" + str(i)] = df_intermediate["common_neighbors"].apply(lambda x: min(x,max_neighbors)).apply(lambda x: min(x,max_neighbors)) df_intermediate["min_neighbors"] = df_intermediate[["neighbors_a","neighbors_b"]].apply(lambda x: min(len(x.neighbors_a), len(x.neighbors_b)),axis=1) df_features["common_neighbors_ratio" + str(i)] = df_features["common_neighbors" + str(i)]/(df_intermediate["min_neighbors"] + 0.00001) print("--> Compute k-core features...") df_intermediate["k_core_a"] = df_questions.loc[df.text_a_ID, "k_core"].values df_intermediate["k_core_b"] = df_questions.loc[df.text_b_ID, "k_core"].values df_features["k_core_min".format(i)] = df_intermediate[["k_core_a", "k_core_b"]].min(axis=1) df_features["k_core_max".format(i)] = df_intermediate[["k_core_a", "k_core_b"]].max(axis=1) print("--> Compute page rank features...") df_intermediate["page_rank_a"] = df_questions.loc[df.text_a_ID, "page_rank"].values df_intermediate["page_rank_b"] = df_questions.loc[df.text_b_ID, "page_rank"].values df_features["page_rank_min"] = df_intermediate[["page_rank_a", "page_rank_b"]].min(axis=1).apply(lambda x: min(x,100)) df_features["page_rank_max"] = df_intermediate[["page_rank_a", "page_rank_b"]].max(axis=1).apply(lambda x: min(x,100)) print("--> Compute closeness centrality features...") df_intermediate["closeness_centrality_a"] = df_questions.loc[df.text_a_ID, "closeness_centrality"].values df_intermediate["closeness_centrality_b"] = df_questions.loc[df.text_b_ID, "closeness_centrality"].values df_features["closeness_centrality_min"] = df_intermediate[["closeness_centrality_a", "closeness_centrality_b"]].min(axis=1).apply(lambda x: min(x,100)) df_features["closeness_centrality_max"] = df_intermediate[["closeness_centrality_a", "closeness_centrality_b"]].max(axis=1).apply(lambda x: min(x,100)) print("--> Compute clustering features...") df_intermediate["clustering_a"] = df_questions.loc[df.text_a_ID, "clustering"].values df_intermediate["clustering_b"] = df_questions.loc[df.text_b_ID, "clustering"].values df_features["clustering_min"] = df_intermediate[["clustering_a", "clustering_b"]].min(axis=1).apply(lambda x: min(x,100)) df_features["clustering_max"] = df_intermediate[["clustering_a", "clustering_b"]].max(axis=1).apply(lambda x: min(x,100)) print("--> Compute eigenvector centrality...") df_intermediate["eigenvector_centrality_a"] = df_questions.loc[df.text_a_ID, "eigenvector_centrality"].values df_intermediate["eigenvector_centrality_b"] = df_questions.loc[df.text_b_ID, "eigenvector_centrality"].values df_features["eigenvector_centrality_min"] = df_intermediate[["eigenvector_centrality_a", "eigenvector_centrality_b"]].min(axis=1).apply(lambda x: min(x,100)) df_features["eigenvector_centrality_max"] = df_intermediate[["eigenvector_centrality_a", "eigenvector_centrality_b"]].max(axis=1).apply(lambda x: min(x,100)) return df_features print("Compute train features...") train_features = preprocess(train) print("Compute test features...") test_features = preprocess(test) print("Store features...") train_features.to_csv("./data/graph_features_train.csv", index=False) test_features.to_csv("./data/graph_features_test.csv", index=False)
def calculateEigenvector(graph): centrality = nx.eigenvector_centrality(graph, weight = 'weight') return centrality
def test_maxiter(self): with pytest.raises(nx.PowerIterationFailedConvergence): G = nx.path_graph(3) b = nx.eigenvector_centrality(G, max_iter=0)
#%% Write out clustering results print('Writing out clustering results ' + str(datetime.now())) clustering_results_d = { 'nodes': G_gn.nodes(), 'clusters': cluster_labels, 'uids': [nx.get_node_attributes(G_gn, 'uid')[n] for n in G_gn.nodes()] } clustering_results = pd.DataFrame(clustering_results_d) clustering_results['nodeDegree'] = clustering_results['nodes'].apply( lambda x: G_gn.degree(x)) clustering_results['frequency'] = clustering_results['uids'].apply( lambda x: len(x)) nodeDegreeCentrality = nx.degree_centrality(G_gn) nodeBetweennessCentrality = nx.betweenness_centrality(G_gn) nodeLoadCentrality = nx.load_centrality(G_gn) nodeEigenvectorCentrality = nx.eigenvector_centrality(G_gn) clustering_results['nodeDegreeCentrality'] = clustering_results[ 'nodes'].apply(lambda x: nodeDegreeCentrality[x]) clustering_results['nodeBetweennessCentrality'] = clustering_results[ 'nodes'].apply(lambda x: nodeBetweennessCentrality[x]) clustering_results['nodeLoadCentrality'] = clustering_results[ 'nodes'].apply(lambda x: nodeLoadCentrality[x]) clustering_results['nodeEigenvectorCentrality'] = clustering_results[ 'nodes'].apply(lambda x: nodeEigenvectorCentrality[x]) for k, v in moduleResultsDict.items(): clustering_results['minClique=' + str(k)] = clustering_results['nodes'].map(v) outpath = outputbasepath + basename + ' clustering results' + '.csv' clustering_results.to_csv(outpath, encoding='utf-8') #%%
def eigenvector_centrality(g, weight=None): return sorted(nx.eigenvector_centrality(g, weight=weight).items(), key=lambda item: item[1], reverse=True)
import networkx as nx import os G = nx.Graph() #Create an empty graph with no nodes and no edges. file = os.path.join("data.txt") #Load the data file with open(file) as p: #Try to open the data file next(p) #ignore the firts row of the dataset for line in p: #iterate in the dataset s=line.split() #Break the dataset into different columns G.add_edge(s[0],s[1],weight=int(s[2])) #Add edges and weights from the dataset eigen_vector_centrality = nx.eigenvector_centrality(G, max_iter=10000) #Calculate the Eigen Vector centrality of the network which will return a dictionary with open("Updated_Eigen_Vector_Centrality_Output.txt","w") as f: #Create a text file name Eigen_Vector_Centrality_Output f.write("\t\t\t\t\t\t\t\t\t************************************\t\t\tEigen Vector Centrality Output\t\t\t************************************"+"\n") #Write a header title for k,v in eigen_vector_centrality.items(): #Iterate into the dictionary f.write(str(k)+": "+str(v)+"\n") #Write Dictionary keys and values in the file
tupl = sorted_Cent[ii] print tupl clsCent = nx.closeness_centrality(G_original) sorted_Cls = sorted(clsCent.items(), key=operator.itemgetter(1), reverse=True) for ii in range(number): tupl = sorted_Cls[ii] print tupl btwCent = nx.betweenness_centrality(G_original) sorted_btw = sorted(btwCent.items(), key=operator.itemgetter(1), reverse=True) for ii in range(number): tupl = sorted_btw[ii] print tupl eigCent = nx.eigenvector_centrality(G_original) sorted_eig = sorted(eigCent.items(), key=operator.itemgetter(1), reverse=True) for ii in range(number): tupl = sorted_eig[ii] print tupl katzCent = nx.katz_centrality_numpy(G_original) sorted_katz = sorted(katzCent.items(), key=operator.itemgetter(1), reverse=True) for ii in range(number): tupl = sorted_katz[ii] print tupl nx.draw(G_original, nodelist=clsCent.keys(),
b_k1 = np.dot(A, b_k) # calculate the norm b_k1_norm = np.linalg.norm(b_k1) b_k_next = b_k1 / b_k1_norm if (np.sum(abs(b_k_next - b_k)) < EPSILON * len(b_k)): break b_k = b_k_next num_simulations -= 1 print("left iters: ", num_simulations) return b_k_next eigenvector_centrality = power_iteration(subG, num_simulations=30) compare(list(zip(G.nodes, eigenvector_centrality)), list(nx.eigenvector_centrality(subG, weight='weight').items()), name="eigenvector") #,size=subSize) # In[26]: """ Clustering Coefficient The global clustering coefficient is the number of closed triplets (or 3 x triangles) over the total number of triplets (both open and closed). The local clustering coefficient is the proportion of links between the vertices within its neighbourhood divided by the number of links that could possibly exist between them. Average clustering coefficient is mean of local clusterings """ unsubG = nx.to_undirected(subG) clustering_coeffs = {}
deg = pd.Series(nx.degree(G)) cc = pd.Series({e: nx.clustering(F, e) for e in F}) deg_cc = pd.concat([deg, cc], axis=1) deg_cc.columns = ("Degree", "CC") deg_cc.groupby("Degree").mean().reset_index()\ .plot(kind="scatter", x="Degree", y="CC", s=100) plt.xscale("log") plt.ylim(ymin=0) plt.grid() dzcnapy.plot("deg_cc") # A study of centralities dgr = nx.degree_centrality(G) clo = nx.closeness_centrality(G) har = nx.harmonic_centrality(G) eig = nx.eigenvector_centrality(G) bet = nx.betweenness_centrality(G) pgr = nx.pagerank(G) hits = nx.hits(G) centralities = pd.concat( [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)], axis=1) centralities.columns = ("Authorities", "Eigenvector", "PageRank", "Harmonic Closeness", "Closeness", "Hubs", "Degree", "Betweenness") centralities["Harmonic Closeness"] /= centralities.shape[0] # Calculate the correlations for each pair of centralities c_df = centralities.corr()
# degree를 계산한 결과를 pandas의 dataframe으로 옮겨서 작업 ''' degrees = pd.DataFrame(list(g.degree()), columns=("country", "degree")).set_index("country") print(degrees.sort_values("degree", ascending=False).head(10)) ''' # clustering 계수를 계산하고 출력 ''' cc = nx.clustering(g) for k in cc: print("%s %s"%(k, cc[k])) ''' # component 리스트 계산 ''' comps = list(nx.connected_components(g)) for k in comps: print(k) ''' # 중심성을 계산한 결과를 출력 print("North Korea : %.2f"%nx.degree_centrality(g)["North Korea"]) print(nx.closeness_centrality(g)) print(nx.betweenness_centrality(g)) print(nx.eigenvector_centrality(g))
def construct_network(portcalls: pd.DataFrame) -> nx.DiGraph: """ Obtain network with all used node attributes stored in graph: - degree - in-degree - out-degree - strength - in-strength - out-strength - closeness centrality (weighted and unweighted) - betweenness centrality (weighted and unweighted) - eigenvector centrality (weighted and unweighted) These node measures can be obtained as follows: > pd.DataFrame.from_dict( dict(network_base.nodes(data=True)), orient='index') """ assert 'port' in portcalls.columns assert 'arrival' in portcalls.columns assert 'departure' in portcalls.columns assert 'ship' in portcalls.columns assert all((portcalls['departure'] - portcalls['arrival']).dropna() > pd.Timedelta(0)) objs = list() for _, ship_df in portcalls.groupby('ship'): duration = ship_df['arrival'] - ship_df['departure'].shift(1) assert all(duration.dropna() >= pd.Timedelta(0)), (print( ship_df['arrival'], ship_df['departure'].shift(1)), duration) obj = pd.DataFrame({ 'source': ship_df['port'].shift(1), 'target': ship_df['port'], 'duration': duration, 'weight': len(ship_df) - 1, 'distance': 1 / (len(ship_df) - 1), }).dropna() objs.append(obj) edgelist = pd.concat(objs) assert all(edgelist['duration'] >= pd.Timedelta(0)) # Get graph G = nx.from_pandas_edgelist(edgelist, edge_attr=True, create_using=nx.DiGraph) nx.set_node_attributes(G, dict(G.degree), 'degree') nx.set_node_attributes(G, dict(G.in_degree), 'in_degree') nx.set_node_attributes(G, dict(G.out_degree), 'out_degree') nx.set_node_attributes(G, dict(G.degree(weight='weight')), 'strength') nx.set_node_attributes(G, dict(G.in_degree(weight='weight')), 'in_strength'), nx.set_node_attributes(G, dict(G.out_degree(weight='weight')), 'out_strength') nx.set_node_attributes(G, nx.closeness_centrality(G, wf_improved=False), 'closeness') nx.set_node_attributes(G, nx.betweenness_centrality(G, normalized=False), 'betweenness') nx.set_node_attributes(G, nx.eigenvector_centrality(G, max_iter=100_000), 'eigenvector') nx.set_node_attributes( G, nx.closeness_centrality(G, distance='distance', wf_improved=False), 'closenss_weighted') nx.set_node_attributes( G, nx.betweenness_centrality(G, weight='weight', normalized=False), 'betweenness_weighted') nx.set_node_attributes( G, nx.eigenvector_centrality(G, weight='weight', max_iter=100_000), 'eigenvectors_weighted') return G
def buildSimpleNetwork(df): def mapp(dic,name): for key ,value in dic.iteritems(): ddf[key][name]=value ddf={} G=nx.DiGraph() GI=nx.DiGraph() nodes=df.hdistrict_id.unique() for n in nodes: ddf[n]={} names=[] dics=[] G.add_nodes_from(nodes) for index,row in df.iterrows(): G.add_edge(row.hdistrict_id,row.sdistrict_id,{'weight':row.counts,'distance':1.0/row.counts}) GI.add_edge(row.sdistrict_id,row.hdistrict_id,{'weight':row.counts,'distance':1.0/row.counts}) dics.append(nx.eigenvector_centrality(G,weight='weight')) names.append('eigen') dics.append(nx.eigenvector_centrality(GI,weight='weight')) names.append('righteigen') dics.append(nx.in_degree_centrality(G)) names.append('indegree') dics.append(nx.out_degree_centrality(G)) names.append('outdegree') dics.append(nx.closeness_centrality(G,distance='distance')) names.append('closeness') dics.append(nx.betweenness_centrality(G,weight='weight')) names.append('betweeness') def get(chunk): return chunk.counts.sum() dics.append(df.groupby('hdistrict_id').apply(get).to_dict()) names.append('outgoing') dics.append(df.groupby('sdistrict_id').apply(get).to_dict()) names.append('incoming') dic={} for n in nodes: neigh=G.neighbors(n) neigh.remove(n) N=len(neigh) s=0.0 for i in range(len(neigh)): for j in range(i+1,len(neigh)): s+=G.has_edge(*(neigh[i],neigh[j])) s+=G.has_edge(*(neigh[j],neigh[i])) dic[n]=s/(N*(N-1)) dics.append(dic) names.append('clustering') def check(row): return row.hdistrict_id==row.sdistrict_id df['check']=df.apply(check,1) df2=df[df.check==False] dics.append(df2.groupby('hdistrict_id').apply(get).to_dict()) names.append('outgoing_noself') dics.append(df2.groupby('sdistrict_id').apply(get).to_dict()) names.append('incoming_noself') for i in range(len(dics)): mapp(dics[i],names[i]) ddf=pd.DataFrame.from_dict(ddf,orient='index') ddf['district_id']=ddf.index return ddf