def compare_pagerank_algorithms(graph_file_name): algo_name = ["PageRank-DOK", "PageRank-CSR", "PageRank-NetworkX"] algo_fns = [construct_sparse_graph_dictionary_of_keys, construct_sparse_graph_compressed_sparse_row, construct_sparse_graph_networkx] for i in range(len(algo_name)): print "Testing:", algo_name[i] start_time = time.time() G = algo_fns[i](graph_file_name) end_time = time.time() time_for_graph_construction = end_time - start_time start_time = time.time() if algo_name[i] == "PageRank-NetworkX": nx.pagerank(G) else: compute_PageRank(G) end_time = time.time() time_for_pagerank_computation = end_time - start_time total_time = time_for_graph_construction + time_for_pagerank_computation print "Time for graph, page rank and total", time_for_graph_construction, time_for_pagerank_computation, total_time
def process_data(): file = 'data/Homo_Sapiens/EVEX_relations_9606.tab' t1 = time.time() g = import_graph(file) t2 = time.time() print 'load relations', t2 - t1 t= transitiveness_graph(g) t3 = time.time() print 'find transitive relations', t3 - t2 #plot(g, 'relations') pr = nx.pagerank(t) t4 = time.time() print 'pagerank', t4 - t3 for node in pr: t.node[node]['confirming_weight'] = pr[node] t5 = time.time() print 'write pr into graph', t5 - t4 #plot(t, 'good_confirming_relations', 'confirming_weight') t_rev = reverse(t) t6 = time.time() print 'reverse t', t6 - t5 pr_rev = nx.pagerank(t_rev) t7 = time.time() print 'pagerank', t7 - t6 for node in pr: t.node[node]['predicting_weight'] = pr_rev[node] t8 = time.time() print 'write pr into graph', t8 - t7 #plot(t, 'good_predicting_relations', 'predicting_weight') save_data(g, 'data/g') save_data(t, 'data/t') save_data(pr, 'data/conf_pr') save_data(pr_rev, 'data/pre_pr')
def create_schedule_graph(seasons,teams): #let's create a directional graph of teams who played each other so we can create a Page rank #teams =[t[1] for t in teams[1:]] t_lookup={int(t[0]):t[1] for t in teams[1:]} print teams teams =[int(t[0]) for t in teams[1:]] pr_hist={} pr_w_hist={} for year in years: G=nx.DiGraph() G.add_nodes_from(teams) G_w=G.copy() games=seasons[np.where((seasons['Season']==year))] for game in games: #add a directional endorsement from the losing team to winning team G.add_weighted_edges_from([(game['Lteam'],game['Wteam'],1)]) # weight by win % squared G_w.add_weighted_edges_from([(game['Lteam'],game['Wteam'],(game['Wscore']/game['Lscore'])**2)]) pr=nx.pagerank(G, alpha=0.9) pr_w=nx.pagerank(G_w, alpha=0.9) ranks=[] ranks_w=[] for r in pr: ranks.append((t_lookup[r],pr[r])) for r in pr_w: ranks_w.append((t_lookup[r],pr_w[r])) pr_hist[year]=pr pr_w_hist[year]=pr_w sorted_pr = sorted(ranks, key=lambda tup: tup[1],reverse=True) sorted_pr_w = sorted(ranks_w, key=lambda tup: tup[1],reverse=True) return pr_hist,pr_w_hist
def calc(): filepath = "/Users/windwild/Google Drive/CUHK/sina_data/user_rel.csv" G = nx.DiGraph() fp = open(filepath,"r") fp.readline() array_list = {} for i in range(0,10): array_list['fui'] = {} line = fp.readline() line_arr = line.split('"') uid = line_arr[0][:-1] line = line_arr[1] print line line = line.replace("u'","'") print line items = demjson.decode(line) for key in items: array_list[key] = items[key] #print items['fui'] print uid,i for follow in array_list['fui']: G.add_edge(uid,follow) fp.close() print nx.pagerank(G)
def _ppage_rank(self, u, v): personal = {nid: 0 for nid in self.IG.node} personal[u] = 1.0 r_uv = nx.pagerank(self.IG, personalization=personal).get(v) personal[u] = 0 personal[v] = 1.0 r_vu = nx.pagerank(self.IG, personalization=personal).get(u) return r_uv + r_vu
def stats(self, g, degree, pagerank, bc): """Compute the requested stats and return as a dict.""" options = self.options stats = {} if options.partial: seen = self.seen empty = self.empty nonempty_seen = [user for user in seen.keys() if user not in empty] # create degree CDF if degree: if options.partial: # The way below for computing degree only considers those for which # we have all the data. degree = [seen[user] for user in seen.keys()] else: # The method below considers all nodes, including those for which # we may not have all the data. Use w/caution on partial data sets. degree = nx.degree(g).values() stats["degree"] = { "type": "array", "values": degree } # compute PageRank. Note: we have to ignore empties. if pagerank: start = time.time() if options.partial: pagerank_dict = nx.pagerank(g) nonempty_seen = [user for user in seen.keys() if user not in empty] pagerank = ([pagerank_dict[user] for user in nonempty_seen]) else: # Assumption: no isolated nodes pagerank = nx.pagerank(g).values() duration = time.time() - start print "time to gen pagerank: %0.3f sec" % duration #print pagerank stats["pagerank"] = { "type": "array", "values": pagerank } # compute betweenness centrality - should empties get added back to CDF? if bc: start = time.time() bc_dict = nx.betweenness_centrality(g) if options.partial: bc = ([bc_dict[user] for user in nonempty_seen]) else: bc = bc_dict.values() duration = time.time() - start print "time to gen betweenness centrality: %0.3f sec" % duration stats["bc"] = { "type": "array", "values": bc } return stats
def train_weight(self,doc): self.type = 1 self.key_sentences = [] self.key_weight = [] (self.sentences,self.words_all_filters,weight) = self.seg.segment_sentences_weight(text=doc) #print doc['title'] (title) = self.seg.segment_sentence(sentence=doc['title']) #print title source = self.words_all_filters sim_func = self._get_similarity_standard sentences_num = len(source) self.graph = np.zeros((sentences_num, sentences_num)) #import pdb weights = [] summary = 0 #print self.sentences[0] #pdb.set_trace() for x in xrange(sentences_num): lanlan = sim_func(source[x], title[0]) w=weight[x]*lanlan weights.append(x) weights.append(w) summary+=w #print w if summary!=0 : dicts = {weights[i]: weights[i+1]/summary for i in range(0, len(weights), 2)} #pdb.set_trace() for x in xrange(sentences_num): for y in xrange(x, sentences_num): similarity = sim_func(source[x], source[y]) self.graph[x, y] = similarity self.graph[y, x] = similarity #pdb.set_trace() # for x in xrange(sentences_num): # row_sum = np.sum(self.graph[x, :]) # if row_sum > 0: # self.graph[x, :] = self.graph[x, :] / row_sum nx_graph = nx.from_numpy_matrix(self.graph) if summary!=0: scores = nx.pagerank(G=nx_graph,personalization=dicts) else: scores = nx.pagerank(G=nx_graph) sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True) # print sorted_scores for index, _ in sorted_scores: self.key_sentences.append(self.sentences[index]) self.key_weight.append(weight[index])
def pagerank(self, edge_weights={}, context=None, context_weight=10): G = self.graphs.unify(edge_weights) if not context: return nx.pagerank(G) else: weights = {} for k in G.nodes(): weights[k] = 1 weights[context] = context_weight return nx.pagerank(G, personalization=weights)
def test_pagerank(self): G = self.G p = networkx.pagerank(G, alpha=0.9, tol=1.e-08) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) nstart = dict((n, random.random()) for n in G) p = networkx.pagerank(G, alpha=0.9, tol=1.e-08, nstart=nstart) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4)
def team_strength(winner_losers): games_and_weights = defaultdict(int) for winner, loser, weight in winner_losers: games_and_weights[winner, loser] += weight win_graph = nx.DiGraph() loss_graph = nx.DiGraph() for (winner, loser), weight in games_and_weights.iteritems(): win_graph.add_edge(loser, winner, weight=weight) loss_graph.add_edge(winner, loser, weight=weight) loss_ranks = nx.pagerank(loss_graph) return {k: v - loss_ranks[k] for k, v in nx.pagerank(win_graph).iteritems()}
def run(edges, show=False): G = nx.DiGraph() # G.add_weighted_edges_from([('A','B',0.5),('A','C',0.5)]) G.add_edges_from(edges) if show: nx.draw(G, pos=nx.spring_layout(G)) plt.show() nx.write_dot(G, './graph.dot') # dot -n -Tpng graph.dot >graph.png # print nx.hits(G, max_iter=10**3) #tol=1e-4) print nx.pagerank(G)
def test_pagerank(self): G = self.G p = networkx.pagerank(G, alpha=0.9, tol=1.0e-08) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) nstart = dict((n, random.random()) for n in G) p = networkx.pagerank(G, alpha=0.9, tol=1.0e-08, nstart=nstart) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) assert_raises(networkx.NetworkXError, networkx.pagerank, G, max_iter=0)
def build_graph(): pair_list = TwitterUser.get_top_100_pair() DG = nx.DiGraph() DG.add_edges_from([(foer, twitter_user) for twitter_user, foer in pair_list]) betweenness = nx.betweenness_centrality(DG) closeness = nx.closeness_centrality(DG) edge_betweenness = nx.edge_betweenness(DG) clustering_co = nx.clustering(nx.Graph(DG)) page_rank = nx.pagerank(DG) for twitter_id in DG.nodes(): t = TwitterUser.get_by_id(twitter_id) node = DG.node[twitter_id] node['user_id'] = t.user_id node['label'] = t.scrn_name node['follower_count'] = t.foer_cnt node['friend_count'] = t.friend_cnt node['status_count'] = t.status_cnt node['location'] = t.location node['verified'] = t.verified node['twitter_age'] = (date.today() - t.created_at).days node['daily_tweet'] = t.status_cnt*1.0/node['twitter_age'] node['indegree'] = len([(id, foer) for id, foer in pair_list if id == twitter_id]) node['outdegree'] = len([(id, foer) for id, foer in pair_list if foer == twitter_id]) node['cluster'] = clustering_co[twitter_id] node['betweenness'] = betweenness[twitter_id] node['closeness'] = closeness[twitter_id] node['page_rank'] = page_rank[twitter_id] for out_n, in_n in DG.edges(): DG[out_n][in_n]['edge_betweenness'] = edge_betweenness[(out_n,in_n)] return DG
def find_rolesX(community): pr = nx.pagerank(community) pr_vals = pr.values() m, sd = mean(pr_vals), std(pr_vals) leaders = [(n, p) for n, p in pr.items() if p > m + 1 * sd] outermosts = [(n, p) for n, p in pr.items() if p < m - 1 * sd] return leaders, outermosts
def test_personalization(self): G = networkx.complete_graph(4) personalize = {0: 1, 1: 1, 2: 4, 3: 4} answer = {0: 0.23246732615667579, 1: 0.23246732615667579, 2: 0.267532673843324, 3: 0.2675326738433241} p = networkx.pagerank(G, alpha=0.85, personalization=personalize) for n in G: assert_almost_equal(p[n], answer[n], places=4)
def test_incomplete_personalization(self): G = networkx.complete_graph(4) personalize = {3: 1} answer = {0: 0.22077931820379187, 1: 0.22077931820379187, 2: 0.22077931820379187, 3: 0.3376620453886241} p = networkx.pagerank(G, alpha=0.85, personalization=personalize) for n in G: assert_almost_equal(p[n], answer[n], places=4)
def test1(): f = open('Results/relation_top5.csv', 'rb') G = nx.read_adjlist(f, delimiter = ',') x = nx.pagerank(G, alpha = 0.9) sort_x = sorted(x.items(), key=lambda item: item[1], reverse=True) for a1, a2 in sort_x: print(str(a1) + ' : ' + str(a2))
def return_summary(text): sent_list = nltk.tokenize.sent_tokenize(text) # deletes sentences that are only made of punctuations sent_list = [sent for sent in sent_list if checkValidSent(sent)] # makes a list of paragraphs - used to count the number of paragraphs pg = text.splitlines(0) pg = [par for par in pg if par != ''] baseline = len(text) # if tehre are too many sentences, this will pick 150 random sentences if len(sent_list) > 150: sent_list = random.sample(sent_list, 150) baseline = sum([len(sent) for sent in sent_list]) # makes graph to use for pagerank text_graph = buildGraph(sent_list) sent_scores = nx.pagerank(text_graph, weight = 'weight') sent_sorted = sorted(sent_scores, key = sent_scores.get, reverse = True) summary = "" scount = 0 # selects a number of the most salient sentences while sent_sorted: sent = sent_sorted.pop(0) scount += 1 if 4 * (len(sent) + len(summary)) >= baseline: break if scount > len(pg): break summary += sent + ' ' return summary
def pagerank(graph, records): """ Reports on the highest (Page)Ranked individuals in the graph """ pr = nx.pagerank(graph) nodes = sorted(pr.items(), key=operator.itemgetter(1), reverse=True)[:records] print("Page Rank - top {} individuals".format(records)) for n in nodes: print(" {:30}:\t{}".format(n[0], n[1]))
def pagerank_example(): n = 7 g = nx.wheel_graph(n) pos = nx.spring_layout(g) g = nx.DiGraph() g.add_nodes_from(range(0,n)) g.add_edge(0,1) g.add_edge(0,6) g.add_edge(0,5) g.add_edge(1,2) g.add_edge(1,6) g.add_edge(2,0) g.add_edge(2,1) g.add_edge(2,3) g.add_edge(3,4) g.add_edge(4,5) g.add_edge(4,6) g.add_edge(5,0) g.add_edge(5,3) g.add_edge(5,4) ranks = nx.pagerank(g) for n in range(0,n): print 'node',n print ' rank:',ranks[n] print ' out edges:',g.neighbors(n) if g.neighbors(n): print ' per edge:',ranks[n]/len(g.neighbors(n)) else: print ' per edge: null' draw_with_centrality(g, layout=pos)
def mmr_pagerank(document_list,len_sen_mat, threshold_t, max_word, mode): n = len(document_list) sim_matrix = build_sim_matrix(document_list, mode) g = nt.Graph() for i in range(n): for j in range(i+1,n,1): g.add_edge(i,j, distance_edge = sim_matrix[i,j]) page_rank = nt.pagerank(g, weight = "distance_edge") score = [] for i in range(n): score.append(page_rank[i]) summary = [] threshold_t = np.average(sim_matrix[0,:]) while (stopCondition(len_sen_mat,summary, max_word) == 0): s = np.argmax(score) score[s] = 0 #delele s from score if check_threshold_mmr_pagerank(sim_matrix,summary,s,threshold_t) == 1: summary.append(s) return summary
def centrality_scores(vote_matrix, season_graph): deg = nx.degree(season_graph) deg = {k: round(v,1) for k,v in deg.iteritems()} close = nx.closeness_centrality(season_graph) close = {k: round(v,3) for k,v in close.iteritems()} btw = nx.betweenness_centrality(season_graph) btw = {k: round(v,3) for k,v in btw.iteritems()} eig = nx.eigenvector_centrality_numpy(season_graph) eig = {k: round(v,3) for k,v in eig.iteritems()} page = nx.pagerank(season_graph) page = {k: round(v,3) for k,v in page.iteritems()} # Add contestant placement (rank) order = list(vote_matrix.index) place_num = list(range(len(order))) place = {order[i]:i+1 for i in place_num} names = season_graph.nodes() # Build a table with centralities table=[[name, deg[name], close[name], btw[name], eig[name], page[name], place[name]] for name in names] # Convert table to pandas df headers = ['name', 'deg', 'close', 'btw', 'eig', 'page', 'place'] df = pd.DataFrame(table, columns=headers) df = df.sort_values(['page', 'eig', 'deg'], ascending=False) return df
def build(self, matrix, skim_depth=10): """ Build graph, with PageRanks on nodes. :param matrix: A term matrix. :param skim_depth: The number of sibling edges. """ # Register nodes and edges. for anchor in progress.bar(matrix.terms): n1 = matrix.text.unstem(anchor) # Heaviest pair scores: pairs = matrix.anchored_pairs(anchor).items() for term, weight in list(pairs)[:skim_depth]: n2 = matrix.text.unstem(term) self.graph.add_edge(n1, n2, weight=weight) # Compute PageRanks. ranks = nx.pagerank(self.graph) first = max(ranks.values()) # Convert to 0->1 ratios. ranks = {k: v/first for k, v in ranks.items()} # Annotate the nodes. nx.set_node_attributes(self.graph, 'pagerank', ranks)
def build_diredge_and_pagerank(): dirG = nx.DiGraph() #print "direction" #print post_author_dict #print post_subauthor_dict totalkey = post_author_dict.keys() + post_subauthor_dict.keys() for key in totalkey: if (key not in post_author_dict) or (key not in post_subauthor_dict): continue actor1_key = post_author_dict[key] actor1 = rev_author_dict[actor1_key] actor2_key_list = post_subauthor_dict[key] if len(actor2_key_list) == 1: #print "damn" continue for actor_key in actor2_key_list: #print "cool" if actor1_key == actor_key: continue actor2 = rev_author_dict[actor_key] #print actor1, actor2 dirG.add_edge(actor1, actor2) pr = nx.pagerank(dirG) sort_pr = sorted(pr.keys(), key = lambda x :pr[x], reverse=True) print sort_pr[:10] return pr
def sort_sentences(sentences, words, sim_func = get_similarity, pagerank_config = {'alpha': 0.85,}): """将句子按照关键程度从大到小排序 Keyword arguments: sentences -- 列表,元素是句子 words -- 二维列表,子列表和sentences中的句子对应,子列表由单词组成 sim_func -- 计算两个句子的相似性,参数是两个由单词组成的列表 pagerank_config -- pagerank的设置 """ sorted_sentences = [] _source = words sentences_num = len(_source) graph = np.zeros((sentences_num, sentences_num)) for x in xrange(sentences_num): for y in xrange(x, sentences_num): similarity = sim_func( _source[x], _source[y] ) graph[x, y] = similarity graph[y, x] = similarity nx_graph = nx.from_numpy_matrix(graph) scores = nx.pagerank(nx_graph, **pagerank_config) # this is a dict sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True) for index, score in sorted_scores: item = AttrDict(index=index, sentence=sentences[index], weight=score) sorted_sentences.append(item) return sorted_sentences
def pagerank_doc(abstr_path, file_name, file_names, omega, phi, ldamodel, corpus, d=0.85, nfselect='027', num_topics=20, window=2): from utils import CiteTextRank from utils.tools import dict2list file_text = read_file(abstr_path, file_name) tagged_tokens = get_tagged_tokens(file_text) filtered_text = get_filtered_text(tagged_tokens) # edge_and_freq = get_edge_freq(filtered_text) # edge_features = add_lev_distance(edge_and_freq)#edge_freq_lev # edge_weight = calc_edge_weight(edge_features, omega) if 'KDD' in abstr_path: dataset = 'kdd' else: dataset = 'www' cite_edge_weight = CiteTextRank.sum_weight(file_name, doc_lmdt=omega[0], citing_lmdt=omega[1], cited_lmdt=omega[2], dataset=dataset, window=window) # print(cite_edge_weight) edge_weight = dict2list(cite_edge_weight) # print(edge_weight) graph = build_graph(edge_weight) node_list = list(graph.node) if 'KDD' in abstr_path: raw_node_features = read_file('./data/', 'KDD_node_features') else: raw_node_features = read_file('./data/', 'WWW_node_features') node_features = read_node_features(node_list, raw_node_features, file_name, nfselect=nfselect) node_weight = calc_node_weight(node_features, phi) # word_prob = get_word_prob(file_name, file_names, node_list, ldamodel, corpus, num_topics=num_topics) node_weight_topic = {} for node in node_list: node_weight_topic[node] = node_weight[node]# * word_prob[node] pr = nx.pagerank(graph, alpha=d, personalization=node_weight_topic) return pr, graph
def textrank(sentences): bow_matrix = CountVectorizer().fit_transform(sentences) normalized = TfidfTransformer().fit_transform(bow_matrix) similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) scores = nx.pagerank(nx_graph) return sorted(((scores[i], i, s) for i, s in enumerate(sentences)), reverse=True)
def main(): preprocess.main() nodes = [] sentences = [] with open('sentences.txt') as f: while(True): line = f.readline() if(line=='\n' or line==''): break nodes.append(sentence_node(0,line.strip('\n'))) print len(nodes) for x in range(len(nodes)): sentences.append(nodes[x].sentence) tfidf_vectorizer = TfidfVectorizer() tfidf_matrix = tfidf_vectorizer.fit_transform(sentences) similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix) G = nx.Graph() for x in range(len(nodes)): G.add_node(x) # G.add_nodes_from(nodes) for i in range(len(nodes)): for j in range(len(nodes)): if(i<j and similarity_matrix[i][j]!=0): G.add_edge(i,j,weight=similarity_matrix[i][j]) pdb.set_trace() for i in range(len(nodes)): if len(G[i]) == 0: print "No out edges" pr = nx.pagerank(G,alpha=0.85) # print pr sorted_pr = sorted(pr.items(), key=operator.itemgetter(1), reverse=True) print sorted_pr[:10] for item in sorted_pr[:10]: print nodes[item[0]].sentence
def calculate_trust_features(users, test_users, trusts): """ Calculates features related to statistics of user in the trust network. Args: users: dictionary of users. test_users: ids of users which are in test. trusts: nx.Digraph object with trust network. Returns: None. Changes are made in users dictionary. """ for user in trusts: if user not in users and user in test_users: # cold-start in test but in trust network users[user] = create_user(user) prank = pagerank(trusts) for user in users: if user not in trusts: users[user]['num_trustors'] = 0 users[user]['num_trustees'] = 0 users[user]['pagerank'] = 0 else: users[user]['num_trustors'] = trusts.in_degree(user) users[user]['num_trustees'] = trusts.out_degree(user) users[user]['pagerank'] = prank[user]
def pagerank_hits(): conn = sqlite3.connect("zhihu.db") #following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 50000) and user_url in (select user_url from User where agree_num > 50000)', conn) following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 10000) and user_url in (select user_url from User where agree_num > 10000)', conn) conn.close() G = nx.DiGraph() cnt = 0 for d in following_data.iterrows(): G.add_edge(d[1][0],d[1][1]) cnt += 1 print 'links number:', cnt pylab.figure(0) nx.draw_networkx(G) pylab.show() # PageRank pr = nx.pagerank(G) prsorted = sorted(pr.items(), key=lambda x: x[1], reverse=True) print 'pagerank top 100:\n' for p in prsorted[:100]: print p[0], p[1] # HITS hub, auth = nx.hits(G) print 'hub top 100:\n' for h in sorted(hub.items(), key=lambda x: x[1], reverse=True)[:100]: print h[0], h[1] print '\nauth top 100:\n' for a in sorted(auth.items(), key=lambda x: x[1], reverse=True)[:100]: print a[0], a[1]
for i in clean_sentences: if len(i) != 0: v = sum([word_embeddings.get(w, np.zeros((100, ))) for w in i.split()]) / (len(i.split()) + 0.001) else: v = np.zeros((100, )) sentence_vectors.append(v) # Similarity Matrix # creating similarity matrix sim_mat = np.zeros([len(sentences), len(sentences)]) # initialize the matrix with cosine similarity scores for i in range(len(sentences)): for j in range(len(sentences)): if i != j: sim_mat[i][j] = cosine_similarity( sentence_vectors[i].reshape(1, 100), sentence_vectors[j].reshape(1, 100))[0, 0] nx_graph = nx.from_numpy_array(sim_mat) scores = nx.pagerank(nx_graph) # Summary Extraction ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) # Extract top 10 sentences as the summary for i in range(10): print(ranked_sentences[i][1])
def hbase_test(): # Default return value default_return = {'nodes': [], 'edges': []} # Input sanity checks search = request.args.get('search', '') if search == None or search == "": print "SD> WARN: Search query is empty" return default_return elif isinstance(search, str): if search.isdigit(): search = int(search) else: print "SD> WARN: Search should be a digit" return default_return search_str = str(search) # Establish contact with database cluster = Cluster(contact_points=['54.219.144.56'], ) session = cluster.connect('harary') # Look for node in database community_id = session.execute( "SELECT community FROM node_community_table WHERE source = " + search_str) if len(community_id) == 0: print "SD> WARN: Could not find node " + search_str + " in database" return default_return community_str = str(community_id[0].community) print "SD> INFO: Node " + search_str + " was found in database with community " + community_str # Search for community members print "SD> INFO: Executing query: " + "SELECT * FROM node_community_table WHERE community = " + community_str + " ALLOW FILTERING" result = session.execute( "SELECT * FROM node_community_table WHERE community = " + community_str + " ALLOW FILTERING;") print "SD> INFO: Query result: " + str( len(result)) + " members were found for community " + community_str # Empty result scenario if len(result) == 0: return default_return # Extreme cases are truncated for practicality max_number_of_nodes = 2000 if len(result) > max_number_of_nodes: print "SD> WARN: Excessive number of node (%i). Something is probably wrong.." % len( result) result = result[0:max_number_of_nodes] node_index = 0 edge_index = 0 # Allocate the number of nodes expected_number_of_nodes = len(result) nodes = [{ 'id': '0', 'index': '0', 'label': '', 'community': 0, 'x': 0, 'y': 0, 'size': 10 } for k in range(expected_number_of_nodes)] # Sigma.js # edges = [{'id': '0', 'source':'0', 'target':'0'} for k in range(expected_number_of_nodes * expected_number_of_nodes)] # D3 edges = [{ 'source': 100, 'target': 1000, 'id': 0 } for k in range(expected_number_of_nodes * expected_number_of_nodes)] # Filter for visualization def filter(x): return len(x.target) < 50 # Map ID to linear range for D3 keys = [r.source for r in result if filter(r)] values = range(len(keys)) dictionary = dict(zip(keys, values)) # Add all nodes for node in result: if filter(node): nodes[node_index]['id'] = str(dictionary[node.source]) nodes[node_index]['index'] = str(node.source) nodes[node_index]['community'] = node.community nodes[node_index]['label'] = "Node: " + str(node.source) nodes[node_index]['x'] = random.random() nodes[node_index]['y'] = random.random() node_index = node_index + 1 if node.target != None: # Add all edges for target in node.target: if target in keys: edges[edge_index]['source'] = dictionary[node.source] edges[edge_index]['target'] = dictionary[target] edges[edge_index]['id'] = str(edge_index) edge_index = edge_index + 1 # Truncate excess nodes = nodes[0:node_index] edges = edges[0:edge_index] # Build graph from json G = json_graph.node_link_graph({ 'nodes': nodes, 'links': edges }, False, True) DiG = nx.DiGraph(G) G = nx.Graph(G) # On the fly computation of properties on manageable sizes bet_cen = nx.betweenness_centrality(G) clo_cen = nx.closeness_centrality(G) eig_cen = nx.eigenvector_centrality(G) pr = nx.pagerank(DiG, alpha=0.9) deg = G.degree() com = community.best_partition(G) for node in nodes: node['betweenness'] = bet_cen[node['id']] node['closeness'] = clo_cen[node['id']] node['eigenvector'] = eig_cen[node['id']] node['pagerank'] = pr[node['id']] node['degree'] = deg[node['id']] node['community'] = com[node['id']] # Return json string return json.dumps({'nodes': nodes, 'edges': edges})
def answer_six(): # Your Code Here ranks = nx.pagerank(G2) ranks = sorted(ranks.items(), key=lambda x: x[1], reverse=True) return [item[0] for item in ranks[:5]] # Your Answer Here
tic = time() for file in ['karate', 'java']: # Constructs a graph of real network G = read(file) # Prints out statistics of real network info(G) # Computes node centralities of real network g = nx.Graph(G) prs = nx.pagerank(g) cs = nx.clustering(g) dcs = nx.degree_centrality(G) ccs = nx.closeness_centrality(G) bcs = nx.betweenness_centrality(G) # Finds community structure of real network c = 0 comms = {} for comm in algorithms.louvain(G).communities: for i in comm: comms[i] = c c += 1
# initialize a StorageClient. Set its connection timeout to 3000ms, retry times to 3 storage_client = StorageClient(meta_client, 3000, 3) # initialize a ScanEdgeProcessor to process scanned edge data scan_edge_processor = ScanEdgeProcessor(meta_client) # initialize a ScanVertexProcessor to process scanned vertex data scan_vertex_processor = ScanVertexProcessor(meta_client) space_to_read = sys.argv[3] if space_to_read not in meta_client.get_parts_alloc_from_cache().keys( ): raise Exception('spaceToRead %s is not found in nebula graph' % space_to_read) # get argument return_cols, which is used in function scan_edge, scan_vertex, scan_part_edge, scan_part_vertex vertex_return_cols, edge_return_cols = get_return_cols(space_to_read) all_cols = True # initialize a Graph in NetworkX G = nx.Graph() # scan vertex data scan_vertex(space_to_read, vertex_return_cols, all_cols) # scan edge data scan_edge(space_to_read, edge_return_cols, all_cols) # print the pagerank value of each node in Graph G of NetworkX print('\npagerank value of each node in Graph G of NetworkX:') print(nx.pagerank(G)) except Exception as x: logging.exception(x)
def function(input): if input == 1: clustering_coefficient = nx.clustering(G, weight='weight') clustering_coefficient = normalise(clustering_coefficient) train_keys, dev_keys, test_keys = create_train_test_dev_split( clustering_coefficient.keys()) train_data, dev_data, test_data = write_train_test_dev( clustering_coefficient, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_clustering_coefficient.txt") write_to_file(dev_data, baseDir + "dev_clustering_coefficient.txt") write_to_file(test_data, baseDir + "test_clustering_coefficient.txt") if input == 2: betweenness_centrality = nx.betweenness_centrality(G, normalized=True, weight='weight') betweenness_centrality = normalise(betweenness_centrality) train_keys, dev_keys, test_keys = create_train_test_dev_split( betweenness_centrality.keys()) train_data, dev_data, test_data = write_train_test_dev( betweenness_centrality, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_betweenness_centrality.txt") write_to_file(dev_data, baseDir + "dev_betweenness_centrality.txt") write_to_file(test_data, baseDir + "test_betweenness_centrality.txt") if input == 3: closeness_centrality = {} set_of_nodes = G.nodes() random.shuffle(set_of_nodes) subset_of_nodes = set_of_nodes[:100000] for node in subset_of_nodes: closeness_centrality[node] = nx.closeness_centrality( G, node, normalized=True, distance='weight') #closeness_centrality = nx.closeness_centrality(G,normalized=True,distance='weight') closeness_centrality = normalise(closeness_centrality) train_keys, dev_keys, test_keys = create_train_test_dev_split( closeness_centrality.keys()) train_data, dev_data, test_data = write_train_test_dev( closeness_centrality, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_closeness_centrality.txt") write_to_file(dev_data, baseDir + "dev_closeness_centrality.txt") write_to_file(test_data, baseDir + "test_closeness_centrality.txt") if input == 4: average_neighbor_degree = nx.average_neighbor_degree(G, weight='weight') average_neighbor_degree = normalise(average_neighbor_degree) train_keys, dev_keys, test_keys = create_train_test_dev_split( average_neighbor_degree.keys()) train_data, dev_data, test_data = write_train_test_dev( average_neighbor_degree, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_average_neighbor_degree.txt") write_to_file(dev_data, baseDir + "dev_average_neighbor_degree.txt") write_to_file(test_data, baseDir + "test_average_neighbor_degree.txt") if input == 5: degree_centrality = nx.degree_centrality(G) degree_centrality = normalise(degree_centrality) train_keys, dev_keys, test_keys = create_train_test_dev_split( degree_centrality.keys()) train_data, dev_data, test_data = write_train_test_dev( degree_centrality, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_degree_centrality.txt") write_to_file(dev_data, baseDir + "dev_degree_centrality.txt") write_to_file(test_data, baseDir + "test_degree_centrality.txt") if input == 6: load_centrality = nx.load_centrality(G, normalized=True, weight='weight') load_centrality = normalise(load_centrality) train_keys, dev_keys, test_keys = create_train_test_dev_split( load_centrality.keys()) train_data, dev_data, test_data = write_train_test_dev( load_centrality, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_load_centrality.txt") write_to_file(dev_data, baseDir + "dev_load_centrality.txt") write_to_file(test_data, baseDir + "test_load_centrality.txt") if input == 7: shortest_path_length = {} nodes_in_graph = G.nodes() random.shuffle(nodes_in_graph) subset_of_nodes = nodes_in_graph[:200000] for i in range(len(subset_of_nodes) - 1): key_1 = subset_of_nodes[i] key_2 = subset_of_nodes[i + 1] #print key_1, key_2 try: #print nx.shortest_path_length(G,source=key_1,target=key_2) shortest_path_length[str(key_1) + "\t" + str(key_2)] = nx.shortest_path_length( G, source=key_1, target=key_2) except: #print 0 shortest_path_length[str(key_1) + "\t" + str(key_2)] = 0 ''' shortest_path_length_dict = nx.shortest_path_length(G,weight='weight') shortest_path_length = {} for key_1 in shortest_path_length_dict: for key_2 in shortest_path_length_dict[key_1]: shortest_path_length[str(key_1)+"\t"+str(key_2)] = shortest_path_length_dict[key_1][key_2] shortest_patth_length = normalise(shortest_path_length) ''' train_keys, dev_keys, test_keys = create_train_test_dev_split( shortest_path_length.keys()) train_data, dev_data, test_data = write_train_test_dev( shortest_path_length, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_shortest_path_length.txt") write_to_file(dev_data, baseDir + "dev_shortest_path_length.txt") write_to_file(test_data, baseDir + "test_shortest_path_length.txt") #jaccard coefficient same for weighted and non-weighted if input == 9: katz_centrality = nx.katz_centrality(G, weight='weight', alpha=0.9, max_iter=100000) katz_centrality = normalise(katz_centrality) train_keys, dev_keys, test_keys = create_train_test_dev_split( katz_centrality.keys()) train_data, dev_data, test_data = write_train_test_dev( katz_centrality, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_katz_centrality.txt") write_to_file(dev_data, baseDir + "dev_katz_centrality.txt") write_to_file(test_data, baseDir + "test_katz_centrality.txt") if input == 10: pagerank = nx.pagerank(G, weight='weight') pagerank = normalise(pagerank) train_keys, dev_keys, test_keys = create_train_test_dev_split( pagerank.keys()) train_data, dev_data, test_data = write_train_test_dev( pagerank, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_pagerank.txt") write_to_file(dev_data, baseDir + "dev_pagerank.txt") write_to_file(test_data, baseDir + "test_pagerank.txt") #communicability same for weighted and non-weighted if input == 12: degree = G.degree(weight='weight') degree = normalise(degree) train_keys, dev_keys, test_keys = create_train_test_dev_split( degree.keys()) train_data, dev_data, test_data = write_train_test_dev( degree, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_degree.txt") write_to_file(dev_data, baseDir + "dev_degree.txt") write_to_file(test_data, baseDir + "test_degree.txt") if input == 13: edges_in_graph = G.edges() random.shuffle(edges_in_graph) subset_of_edges = edges_in_graph[:100000] jaccard_coefficient = nx.jaccard_coefficient(G, ebunch=subset_of_edges) jaccard_coefficient_dict = {} for u, v, p in jaccard_coefficient: jaccard_coefficient_dict[str(u) + "\t" + str(v)] = p jaccard_coefficient_dict = normalise(jaccard_coefficient_dict) train_keys, dev_keys, test_keys = create_train_test_dev_split( jaccard_coefficient_dict.keys()) train_data, dev_data, test_data = write_train_test_dev( jaccard_coefficient_dict, train_keys, dev_keys, test_keys) write_to_file(train_data, baseDir + "train_jaccard_coefficient_dict.txt") write_to_file(dev_data, baseDir + "dev_jaccard_coefficient_dict.txt") write_to_file(test_data, baseDir + "test_jaccard_coefficient_dict.txt")
keepbooking = {v: k for k, v in bookkeeping.items()} graph = nx.DiGraph() graph.add_nodes_from(bookkeeping.keys()) # Parse through HTML responses and gather up possible links def gather_links(baseURLKey, html): links = [] for link in BeautifulSoup(html, "lxml", parse_only=SoupStrainer('a')): if hasattr(link, "href"): url = urljoin(bookkeeping[baseURLKey], link.get('href')) # Only add into the graph if the link is bookmarked if url in keepbooking: links.append((baseURLKey, keepbooking[url])) return links # Loop through all links and construct the directed graph for k in bookkeeping: with open('/webpages_raw/' + k, encoding='utf-8') as rawData: links = gather_links(k, rawData.read()) graph.add_edges_from(links) # Calculate PageRank calculated_page_rank = nx.pagerank(graph) with open('pageranks', 'w') as outfile: json.dump(calculated_page_rank, outfile)
def answer_six(): import operator pr = sorted(nx.pagerank(G2, alpha = 0.85).items(), key = operator.itemgetter(1), reverse = True) return [i[0] for i in pr[0:5]]
def extractKeyphrases(text): #tokenize the text using nltk wordTokens = nltk.word_tokenize(text) #assign POS tags to the words in the text tagged = nltk.pos_tag(wordTokens) textlist = [x[0] for x in tagged] tagged = filter_for_tags(tagged) tagged = normalize(tagged) unique_word_set = unique_everseen([x[0] for x in tagged]) word_set_list = list(unique_word_set) #this will be used to determine adjacent words in order to construct keyphrases with two words graph = buildGraph(word_set_list) #pageRank - initial value of 1.0, error tolerance of 0,0001, calculated_page_rank = nx.pagerank(graph, weight='weight') #most important words in ascending order of importance keyphrases = sorted(calculated_page_rank, key=calculated_page_rank.get, reverse=True) #the number of keyphrases returned will be relative to the size of the text (a third of the number of vertices) aThird = len(word_set_list) / 3 keyphrases = keyphrases[0:aThird + 1] #take keyphrases with multiple words into consideration as done in the paper - if two words are adjacent in the text and are selected as keywords, join them #together modifiedKeyphrases = set([]) dealtWith = set( [] ) #keeps track of individual keywords that have been joined to form a keyphrase i = 0 j = 1 while j < len(textlist): firstWord = textlist[i] secondWord = textlist[j] if firstWord in keyphrases and secondWord in keyphrases: keyphrase = firstWord + ' ' + secondWord modifiedKeyphrases.add(keyphrase) dealtWith.add(firstWord) dealtWith.add(secondWord) else: if firstWord in keyphrases and firstWord not in dealtWith: modifiedKeyphrases.add(firstWord) #if this is the last word in the text, and it is a keyword, #it definitely has no chance of being a keyphrase at this point if j == len( textlist ) - 1 and secondWord in keyphrases and secondWord not in dealtWith: modifiedKeyphrases.add(secondWord) i = i + 1 j = j + 1 return modifiedKeyphrases
def answer_six(): # Your Code Here pr = nx.pagerank(G2, alpha=0.85) return sorted(pr.keys(), key=lambda key: pr[key], reverse=True)[:5]
def answer_five(): # Your Code Here pr = nx.pagerank(G2, alpha=0.85) return pr['realclearpolitics.com']
import networkx as nx PATH = "/Users/Nandhini/Documents/Courses/CSCI 572 - IR Fall 20/Assignment 4/solr-7.7.0/../LATIMES/latimes/" g = nx.read_edgelist("edge_dist_new.txt", create_using=nx.DiGraph()) pagerank = nx.pagerank(g, alpha=0.85, personalization=None, max_iter=30, tol=1e-06, nstart=None, weight='weight', dangling=None) prs = set() for file, pr in pagerank.items(): prs.add(pr) print("Max", max(prs)) print("Min", min(prs)) with open("pagerank_new.txt", "w") as pg_file: for file, pr in pagerank.items(): pg_file.write(PATH + file + "=" + str(pr) + "\n") pg_file.close()
deg_cc = pd.concat([deg, cc], axis=1) deg_cc.columns = ("Degree", "CC") deg_cc.groupby("Degree").mean().reset_index()\ .plot(kind="scatter", x="Degree", y="CC", s=100) plt.xscale("log") plt.ylim(ymin=0) plt.grid() dzcnapy.plot("deg_cc") # A study of centralities dgr = nx.degree_centrality(G) clo = nx.closeness_centrality(G) har = nx.harmonic_centrality(G) eig = nx.eigenvector_centrality(G) bet = nx.betweenness_centrality(G) pgr = nx.pagerank(G) hits = nx.hits(G) centralities = pd.concat( [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)], axis=1) centralities.columns = ("Authorities", "Eigenvector", "PageRank", "Harmonic Closeness", "Closeness", "Hubs", "Degree", "Betweenness") centralities["Harmonic Closeness"] /= centralities.shape[0] # Calculate the correlations for each pair of centralities c_df = centralities.corr() ll_triangle = np.tri(c_df.shape[0], k=-1) c_df *= ll_triangle
import networkx import operator f = open('user_retweet_list.txt', 'r') G = networkx.Graph() for line in f: splits = line.split() G.add_edge(splits[0], splits[1]) pr = networkx.pagerank(G) print pr print sorted(pr.items(), key=operator.itemgetter(1), reverse=True)[:10]
(math.sqrt(len(set(train_graph.successors(a)))*len((set(train_graph.successors(b)))))) return sim except: return 0 def cosine_for_followers(a,b): try: if len(set(train_graph.predecessors(a))) == 0 | len(set(train_graph.predecessors(b))) == 0: return 0 sim = (len(set(train_graph.predecessors(a)).intersection(set(train_graph.predecessors(b)))))/\ (math.sqrt(len(set(train_graph.predecessors(a))))*(len(set(train_graph.predecessors(b))))) return sim except: return 0 pr = nx.pagerank(train_graph, alpha=0.85) ##pickle.dump(pr,open('data/page_rank.p','wb')) mean_pr=float(sum(pr.values())) / len(pr) def compute_shortest_path_length(a,b): p=-1 try: if train_graph.has_edge(a,b): train_graph.remove_edge(a,b) p= nx.shortest_path_length(train_graph,source=a,target=b) train_graph.add_edge(a,b) else: p= nx.shortest_path_length(train_graph,source=a,target=b) return p except:
def Page_Rank(G): PageRank_Centrality = nx.pagerank(G, alpha=0.85) #print "PageRank_Centrality:", sorted(PageRank_Centrality.iteritems(), key=lambda d:d[1], reverse = True) return PageRank_Centrality
def calculatePageRank(): pr = nx.pagerank(G, weight = 'weight') return pr
def create(request): if request.method == 'POST': data = request.POST['parag'] paragraph = data text = data.replace('\n', '') data = text for k in text.split("\n"): text2 = re.sub(r"[^a-zA-Z0-9&]+", ' ', k) text = text2 tokens = [t for t in text.split()] sr = stopwords.words('english') clean_tokens = tokens[:] for token in tokens: if token in stopwords.words('english'): clean_tokens.remove(token) freq = nltk.FreqDist(clean_tokens) s = [(k, freq[k]) for k in sorted(freq, key=freq.get, reverse=True)] title = s[0][0] search_queries = [ sorted(freq.items(), key=lambda kv: (kv[1], kv[0]), reverse=True)[0][0] + " " + sorted(freq.items(), key=lambda kv: (kv[1], kv[0]), reverse=True)[1][0] ] for query in search_queries: downloadimages(query, title) stop_words = stopwords.words('english') summarize_text = [] # Step 1 - Read text anc split it article = data.split(". ") sentences = [] sentences_list = '' count_sentence = 0 for sentence in article: count_sentence = count_sentence + 1 sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" ")) sentences.pop() top_n = int(count_sentence / 3) # Step 2 - Generate Similary Martix across sentences sentence_similarity_martix = build_similarity_matrix( sentences, stop_words) # Step 3 - Rank sentences in similarity martix sentence_similarity_graph = nx.from_numpy_array( sentence_similarity_martix) scores = nx.pagerank(sentence_similarity_graph) # Step 4 - Sort the rank and pick top sentences ranked_sentence = sorted( ((scores[i], s) for i, s in enumerate(sentences)), reverse=True) for i in range(top_n): summarize_text.append(" ".join(ranked_sentence[i][1])) # Step 5 - Offcourse, output the summarize texr m = 1 # Driver Code with open("visualizer/input/op.tsv", "w") as text_file: text_file.write("content" + "\t" + "val" + '\n') for i in summarize_text: sentences_list = sentences_list + i search_queries.append(i) text_file.write(i + "\t" + str(m) + '\n') m = m + 1 emotion = predict() for query in search_queries: review = re.sub('[^a-zA-Z]', ' ', query) review = review.lower() review = review.split() ps = PorterStemmer() review = [ ps.stem(word) for word in review if not word in set(stopwords.words('english')) ] review = ' '.join(review) downloadimages(review, title) fps = 0.2 file_list = glob.glob( 'visualizer/images/' + title + '/*.jpg') # Get all the pngs in the current directory file_list_sorted = natsorted(file_list, reverse=False) # Sort the images clips = [ImageClip(m).set_duration(5) for m in file_list_sorted] concat_clip = concatenate(clips, method="compose") concat_clip.write_videofile("visualizer/output/project.mp4", fps=fps) folder = 'visualizer/images/' + title + '/' for the_file in os.listdir(folder): file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) #elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(e) textClip = gTTS(text=sentences_list, lang=language, slow=False) textClip.save("visualizer/output/voice.mp3") audioclip = AudioFileClip("visualizer/output/voice.mp3") my_clip = VideoFileClip('visualizer/output/project.mp4') audio_background = AudioFileClip('visualizer/emotions/' + emotion + '.mp3') new_audioclip = CompositeAudioClip( [audio_background.volumex(0.08), audioclip.volumex(1)]) final_audio = CompositeAudioClip([new_audioclip]) audio = afx.audio_loop(final_audio, duration=audioclip.duration) final_clip = my_clip.set_audio(audio) final_clip.write_videofile("visualizer/output/" + title + '.mp4') data = title file_path = 'visualizer/output/' + data + '.mp4' video = Video() video.data = paragraph video.name = data video.videofile = file_path video.save() return redirect(video.videofile.url) if request.method == 'GET': return render(request, 'index.html')
def get_graph_metrics(connectivity_vector) : # reshape into matrix connectivity_matrix = np.reshape(connectivity_vector, (90, 90)) # convert to networkx graph connectivity_graph = nwx.from_numpy_matrix(connectivity_matrix) # convert to distance graph as some metrics need this instead distance_matrix = connectivity_matrix distance_matrix[distance_matrix == 0] = np.finfo(np.float32).eps distance_matrix = 1.0 / distance_matrix distance_graph = nwx.from_numpy_matrix(distance_matrix) # intialise vector of metrics metrics = np.zeros((21,)) # fill the vector of metrics # 1 and 2: degree distribution degrees = np.sum(connectivity_matrix, axis = 1) metrics[0] = np.mean(degrees) metrics[1] = np.std(degrees) # 3 and 4: weight distribution weights = np.tril(connectivity_matrix, k = -1) metrics[2] = np.mean(weights) metrics[3] = np.std(weights) # 5: average shortest path length # transform weights to distances so this makes sense metrics[4] = nwx.average_shortest_path_length(distance_graph, weight='weight') # 6: assortativity metrics[5] = nwx.degree_assortativity_coefficient(connectivity_graph, weight='None') # 7: clustering coefficient metrics[6] = nwx.average_clustering(connectivity_graph, weight='weight') # 8: transitivity metrics[7] = nwx.transitivity(connectivity_graph) # 9 & 10: local and global efficiency metrics[8] = np.mean(bct.efficiency_wei(connectivity_matrix, local=True)) metrics[9] = bct.efficiency_wei(connectivity_matrix, local=False) # 11: Clustering coefficient metrics[10] = np.mean(nwx.clustering(connectivity_graph, weight='weight').values()) # 12 & 13: Betweeness centrality metrics[11] = np.mean(nwx.betweenness_centrality(distance_graph, weight='weight').values()) metrics[12] = np.mean(nwx.current_flow_betweenness_centrality(distance_graph, weight='weight').values()) # 14: Eigenvector centrality metrics[13] = np.mean(nwx.eigenvector_centrality(distance_graph, weight='weight').values()) # 15: Closeness centrality metrics[14] = np.mean(nwx.closeness_centrality(distance_graph, distance='weight').values()) # 16: PageRank metrics[15] = np.mean(nwx.pagerank(connectivity_graph, weight='weight').values()) # 17: Rich club coefficient metrics[16] = np.mean(nwx.rich_club_coefficient(connectivity_graph).values()) # 18: Density metrics[17] = bct.density_und(connectivity_matrix)[0] # 19, 20, 21: Eccentricity, radius, diameter spl_all = nwx.shortest_path_length(distance_graph, weight='weight') eccs = np.zeros(90,) for i in range(90) : eccs[i] = np.max(spl_all[i].values()) metrics[18] = np.mean(eccs) metrics[19] = np.min(eccs) metrics[20] = np.max(eccs) return metrics
sentences -- 列表,元素是句子 words -- 二维列表,子列表和sentences中的句子对应,子列表由单词组成 sim_func -- 计算两个句子的相似性,参数是两个由单词组成的列表 pagerank_config -- pagerank的设置 """ sorted_sentences = [] _source = words sentences_num = len(_source) graph = np.zeros((sentences_num, sentences_num)) for x in xrange(sentences_num): for y in xrange(x, sentences_num): '''_source[x]是分完词之后的句子''' similarity = sim_func( _source[x], _source[y] ) '''无向(双向)有权图,权重是2个句子间的相似性''' graph[x, y] = similarity graph[y, x] = similarity nx_graph = nx.from_numpy_matrix(graph) '''使用pagerank算法''' scores = nx.pagerank(nx_graph, **pagerank_config) # this is a dict sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True) for index, score in sorted_scores: item = AttrDict(index=index, sentence=sentences[index], weight=score) sorted_sentences.append(item) return sorted_sentences if __name__ == '__main__': pass
def page_rank(n, page_graph): ranks = pagerank(page_graph, alpha=0.6) summary = sorted(ranks.keys(), key=lambda k: ranks[k], reverse=True)[:n] return summary, ranks
def add_pagerank(self, graph): pr = nx.pagerank(graph) nx.set_node_attributes(graph, 'zpagerank', pr)
vtotnot += len(vnot[com]) vtot0 += len(vpos0[com]) - len(vneg0[com]) vtot1 += sum(vpos1[com]) - sum(vneg1[com]) vtot2 += sum(vpos2[com]) - sum(vneg2[com]) tv[com] = sum(vpos2[com]) - sum(vneg2[com]) Gp[com].add_edges_from(vpos0[com]) Gprp.add_edges_from(vpos0[com]) Gn[com].add_edges_from(vneg0[com]) Gprn.add_edges_from(vneg0[com]) GpT = nx.DiGraph.reverse(Gp[com], copy=True) GnT = nx.DiGraph.reverse(Gn[com], copy=True) nx.set_node_attributes(GpT, False, 'visited') nx.set_edge_attributes(GpT, False, 'visited') nx.set_node_attributes(GnT, False, 'visited') nx.set_edge_attributes(GnT, False, 'visited') prp = nx.pagerank(GpT) prn = nx.pagerank(GnT) pr[i] += prp[i] - prn[i] wpp = PageRank(GpT, 0.85) wpn = PageRank(GnT, 0.85) wp[i] += wpp[i] - wpn[i] sh[i] = vtotnot un[i] = vtot0 tw[i] = vtot1 di[i] = vtot2 print( "NODE {}, color {}, vnot {}, v0node {}, v1node {}, v2node {} v3node {}" .format(i, G.node[i], vtotnot, vtot0, vtot1, vtot2, pr[i]))
def cleanGraph(Gr): # takes gexf file # output: better indexed network, dict of node labels to easier indices inds = {} for i in list(Gr.nodes): inds[i] = Gr.nodes[i] Gr = nx.convert_node_labels_to_integers(Gr) return Gr, inds day1 = nx.read_gexf("data/sp_data_school_day_1_g.gexf_") G, inds = cleanGraph(day1) #G = nx.gnp_random_graph(600,0.1) pagerank = nx.pagerank(G) bet = nx.betweenness_centrality(G) close = nx.closeness_centrality(G) def getMaxMinMid(centralities): # returns node label for max, min, mid import operator length = len(centralities) nodelist = list(sorted(centralities.items(), key=operator.itemgetter(1))) mmax = nodelist[length - 1][0] mmin = nodelist[0][0] mmid = nodelist[int(length / 2)][0] return mmax, mmin, mmid
import pandas as pd import numpy as np import networkx as nx import community train = pd.read_table('./Data/training_set.txt',sep=' ',names=['source','target','link']) G = nx.Graph() train_link = train[train.link == 1][['source','target']] G.add_edges_from(train_link.values) id_degree_dict = G.degree() id_bc_dict = nx.betweenness_centrality(G) id_cluster_dict = nx.clustering(G) id_pagerank_dict = nx.pagerank(G) def Save_Obj(Obj,File_Name): import pickle File = File_Name + '.pkl' output = open(File, 'wb') pickle.dump(Obj, output) output.close() Save_Obj(id_degree_dict,'./Data/id_degree_dict') Save_Obj(id_bc_dict,'./Data/id_bc_dict') Save_Obj(id_cluster_dict,'./Data/id_cluster_dict') Save_Obj(id_pagerank_dict,'./Data/id_pagerank_dict') # community
def page_rank(graph): return nx.pagerank(graph)
def answer_five(): # Your Code Here ranks = nx.pagerank(G2) return ranks['realclearpolitics.com'] # Your Answer Here
for w1 in sent1: for w2 in sent2: if w1 == w2: # print(w1) if w1 in word_scores: print(w1, "-->", i, j) weight = weight + word_scores[w1] print(weight) w = weight G.add_edge(temp1, temp2, weight=w) nx.draw(G) plt.show() sent_scores = nx.pagerank(G, 0.85) print(sent_scores) print("\n\n\n final scores \n\n\n") s = [(k, sent_scores[k]) for k in sorted(sent_scores, key=sent_scores.get, reverse=True)] for k, sent_scores[k] in s: v = sent_scores[k] print(k, v) print("\n\n\n final summary \n\n\n") size = minimum(10, len(s))
G.add_edge(18,100) G.add_edge(19,100) G.add_edge(20,100) G.add_edge(22,100) G.add_edge(25,100) G.add_edge(27,100) G.add_edge(34,100) G.add_edge(13,1) G.add_edge(13,15) G.add_edge(13,17) G.add_edge(13,18) G.add_edge(13,19) G.add_edge(13,20) G.add_edge(18,1) G.add_edge(18,13) G.add_edge(18,17) G.add_edge(18,19) G.add_edge(18,20) G.add_edge(19,18) G.add_edge(22,15) print 'Degree: ' print G.degree(G.nodes()) print 'PG:' print nx.pagerank(G) pos=nx.circular_layout(G,dim=50, scale=100) plt.clf() nx.draw(G,with_labels=True) plt.savefig('C:/Users/Branko/Desktop/VPJXQGSRWJZDOB-UHFFFAOYSA-O_oriented.png')
def rank(self): return networkx.pagerank(self.graph, weight='weight')