def test_numpy_pagerank(self): G = self.G p = networkx.pagerank_numpy(G, alpha=0.9) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) personalize = dict((n, random.random()) for n in G) p = networkx.pagerank_numpy(G, alpha=0.9, personalization=personalize)
def personal_page_rank(self, p_vector, reverse=False): ''' Personal_Page_Rank: Get the personal pagerank of the supplied input vector Input: - p_vector: A hash-map of input values for a selection (or all) nodes (if supplied nodes aren't in the graph, they will be ignored) Output: - A vector of diffused heats in hash-map (key,value) format ''' input_pvec = None if p_vector: input_pvec = {} epsilon = 0.0 for node in self.G.nodes(data=False): if node in p_vector: input_pvec[node] = p_vector[node] else: input_pvec[node] = epsilon if reverse: return nx.pagerank_numpy(self.G_reversed, 0.85, input_pvec) else: return nx.pagerank_numpy(self.G, 0.85, input_pvec)
def personal_page_rank(self, p_vector, reverse=False): ''' Personal_Page_Rank: Get the personal pagerank of the supplied input vector Input: - p_vector: A hash-map of input values for a selection (or all) nodes (if supplied nodes aren't in the graph, they will be ignored) Output: - A vector of diffused heats in hash-map (key,value) format ''' input_pvec = None # without initializing this vector the initial probabilities will be flat # and this will be equivalent to standard page rank if p_vector: input_pvec = {} # doesn't seem to be necessary for a non-zero epsilon now, but # leave this as a place holder epsilon = 0.0 for node in self.G.nodes(data=False): if node in p_vector: input_pvec[node] = p_vector[node] else: input_pvec[node] = epsilon if reverse: return nx.pagerank_numpy(self.G_reversed, 0.85, input_pvec) else: return nx.pagerank_numpy(self.G, 0.85, input_pvec)
def main(): disapprove, cooperate = build_graph(gdelt_data_iter()) # Computer pagerank for disapprove Graph node print("Computing pagerank for disapprove graph") pagerank1 = nx.pagerank_numpy(disapprove, alpha=0.90) print("Computing pagerank for cooperate graph") pagerank2 = nx.pagerank_numpy(cooperate, alpha=0.90) max1 = max(pagerank1.values()) key1 = '' key2 = '' for key in pagerank1.keys(): if pagerank1[key] == max1: key1 = key max2 = max(pagerank2.values()) for key in pagerank2.keys(): if pagerank2[key] == max2: key2 = key with open('results/disapprove_graph_page_rank.csv', 'w') as f1: for line in str(pagerank1): f1.write(line) with open('results/cooperate_graph_page_rank.csv', 'w') as f2: for line in str(pagerank2): f2.write(line) print("Maximum Page rank for disapprove graph is: %s %s" % (key1, max1)) print("Maximum Page rank for cooperate graph is: %s %s" % (key2, max2))
def get_pagerank(G): p = nx.pagerank_numpy(G, alpha=0.9) for n in G: print(p[n]) personalize = dict((n, np.random.random()) for n in G) p = nx.pagerank_numpy(G, alpha=0.9, personalization=personalize) return p
def test_numpy_pagerank(self): try: import numpy except ImportError: raise SkipTest('numpy not available.') G=self.G p=networkx.pagerank_numpy(G,alpha=0.9) for n in G: assert_almost_equal(p[n],G.pagerank[n],places=4) personalize = dict((n,random.random()) for n in G) p=networkx.pagerank_numpy(G,alpha=0.9, personalization=personalize)
def calculate_pagerank(self): """ Calculates PageRank for every node of graph. For directed graphs only. """ if self.is_weighted: values = nx.pagerank_numpy(self.graph, weight='weight') else: values = nx.pagerank_numpy(self.graph, weight=None) nx.set_node_attributes(self.graph, 'pagerank', values)
def test_numpy_pagerank(self): try: import numpy except ImportError: raise SkipTest('numpy not available.') G = self.G p = networkx.pagerank_numpy(G, alpha=0.9) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) personalize = dict((n, random.random()) for n in G) p = networkx.pagerank_numpy(G, alpha=0.9, personalization=personalize)
def pagerank(graph, weighted=True): """ Pagerank algorithm with beta = 0.85. If unweighted, then every outgoing edge is considered uniformly. Otherwise, outgoing edges are weighted by their given weights. Returns: An array where the ith element corresponds to the pagerank score of agent i in the trust graph. """ if weighted: return np.array(nx.pagerank_numpy(graph).values()) else: return np.array(nx.pagerank_numpy(graph, weight=None).values())
def write_stats(g_true, g_pshrg, g_stergm, g_er, count): true_in = g_true.in_degree().values() true_out = g_true.out_degree().values() true_page = map(lambda x: round(x, 3), nx.pagerank_numpy(g_true).values()) pshrg_in = g_pshrg.in_degree().values() pshrg_out = g_pshrg.out_degree().values() pshrg_page = map(lambda x: round(x, 3), nx.pagerank_numpy(g_pshrg).values()) er_in = g_er.in_degree().values() er_out = g_er.out_degree().values() er_page = map(lambda x: round(x, 3), nx.pagerank_numpy(g_er).values()) stergm_in = g_stergm.in_degree().values() stergm_out = g_stergm.out_degree().values() stergm_page = map(lambda x: round(x, 3), nx.pagerank_numpy(g_stergm).values()) gcd_pshrg = PSHRG.GCD(g_pshrg, g_true) cdf_in_pshrg = PSHRG.cdf_sum(pshrg_in, true_in) cdf_out_pshrg = PSHRG.cdf_sum(pshrg_out, true_out) cdf_page_pshrg = PSHRG.cdf_sum(pshrg_page, true_page) gcd_er = PSHRG.GCD(g_er, g_true) cdf_in_er = PSHRG.cdf_sum(er_in, true_in) cdf_out_er = PSHRG.cdf_sum(er_out, true_out) cdf_page_er = PSHRG.cdf_sum(er_page, true_page) gcd_stergm = PSHRG.GCD(g_stergm, g_true) cdf_in_stergm = PSHRG.cdf_sum(stergm_in, true_in) cdf_out_stergm = PSHRG.cdf_sum(stergm_out, true_out) cdf_page_stergm = PSHRG.cdf_sum(stergm_page, true_page) with open('./final_dump/stats.csv', 'a') as f: csvwriter = csv.writer(f, quoting=csv.QUOTE_MINIMAL) csvwriter.writerow([ g_true.name, count, g_true.order(), g_true.size(), g_pshrg.order(), g_pshrg.size(), g_er.order(), g_er.size(), g_stergm.order(), g_stergm.size(), gcd_pshrg, cdf_in_pshrg, cdf_out_pshrg, cdf_page_pshrg, gcd_er, cdf_in_er, cdf_out_er, cdf_page_er, gcd_stergm, cdf_in_stergm, cdf_out_stergm, cdf_page_stergm ])
def getRandomPageRanks(filename): Ga=nx.read_graphml(sys.argv[1]) # create a copy of the graph and extract giant component # get component size distribution cc=nx.connected_components(Ga) cc_dict={} for x in range(0,len(cc)): try: cc_dict[len(cc[x])].append(x) except KeyError: cc_dict[len(cc[x])]=[] cc_dict[len(cc[x])].append(x) isolates=nx.isolates(Ga) rg=nx.fast_gnp_random_graph(Ga.number_of_nodes(),2.0*Ga.number_of_edges()/(Ga.number_of_nodes()*(Ga.number_of_nodes()-1))) c_rg=nx.average_clustering(rg) rg_cc=nx.connected_component_subgraphs(rg)[0] rg_asp=nx.algorithms.shortest_paths.generic.average_shortest_path_length(rg_cc) p_rg=community.best_partition(rg_cc) m_rg=community.modularity(p_rg,rg_cc) pageranks = nx.pagerank_numpy(rg) return pageranks
def generate_summary(file_name, top_n=5): stop_words = stopwords.words('spanish') summarize_text = [] # Step 1 - Read text anc split it sentences = read_article(file_name) # Step 2 - Generate Similary Martix across sentences sentence_similarity_martix = build_similarity_matrix(sentences, stop_words) # Step 3 - Rank sentences in similarity martix sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) scores = nx.pagerank_numpy(sentence_similarity_graph) # Step 4 - Sort the rank and pick top sentences ranked_sentence = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) #print("Indexes of top ranked_sentence order are ", ranked_sentence) for i in range(top_n): summarize_text.append(" ".join(ranked_sentence[i][1])) # Step 5 - Offcourse, output the summarize texr # print(len(summarize_text)) # print(" ", ". ".join(summarize_text)) return summarize_text
def calculate_artifacts_centrality(pair_counts): """This uses PageRank to determine artifacts centrality.""" g = nx.Graph() for pair in pair_counts: g.add_edge(pair[0], pair[1], weight=pair_counts[pair]) pr = nx.pagerank_numpy(g) return pr
def OrigPagerank(self): ''' returns a 2d array containing the pagerank of the origin node for all edges ''' probas = np.dot( np.array(nx.pagerank_numpy(self).values(),dtype=float).reshape(-1,1), np.ones((1,self.number_of_nodes()))) return probas
def calc_pagerank(self): """The method will calculate the PR scores for the entire set, with all the hyper parameters and write the results to files""" for hyper_params, full_df in self.dict_all_options_stochastic.items(): sim_func, lambda_param = (s.split('-')[1] for s in hyper_params.split('+')) print( f'Working on the combination: \n' f'Similarity: {sim_func} lambda: {lambda_param} predictor: {self.predictor}' ) stime = Timer('PageRank Calculations') for pred_score in self.prediction_scores: _score_list = [] for topic, _df in full_df[pred_score].groupby('topic'): df = pd.DataFrame(_df) df = df.reset_index().drop('topic', axis=1).pivot(index='src', columns='dest') df.columns = df.columns.droplevel(0) graph = nx.from_pandas_adjacency(df, nx.DiGraph) pr_dict = nx.pagerank_numpy(graph, alpha=1) _score_list.append(pd.Series(pr_dict)) pr_sr = pd.concat(_score_list) self._write_results(pr_sr, sim_func, pred_score.split('_')[1], lambda_param) stime.stop()
def attributes(G): #elen={} #for e in G.edges: ## Centrality metrics G_clustering=nx.clustering(G) G_deg=nx.degree_centrality(G) G_degree=nx.degree(G) #G_bet=nx.betweenness_centrality(G) G_eig=nx.eigenvector_centrality_numpy(G) G_page=nx.pagerank_numpy(G) #G_load=nx.load_centrality(G) G_katz=nx.katz_centrality_numpy(G) G_closeness=nx.closeness_centrality(G) # aka node strenght https://arxiv.org/pdf/0803.3884.pdf # closeness #print(G_closeness) Centrality_metric={"Degree_centrality":G_deg,"Eigencentrality":G_eig,"katz":G_katz,"Pagerank":G_page,"Closeness":G_closeness,"Clustering":G_clustering} #Centrality_metric={"Degree_centrality":G_deg,"Clustering":G_clustering} for cent in Centrality_metric: nx.set_node_attributes(G,name=cent,values=Centrality_metric[cent]) d = {key: value for (key, value) in G_degree} nx.set_node_attributes(G,name="Degree",values=d) #CBN1.0 Centrality by node G=cbn2(G) G=cbn3(G) #return G,G_page,G_katz,G_closeness return G
def out_page_rank( self, horizon: int, table_name: str = "fevd", normalize: bool = False, ) -> np.ndarray: """Calculate the page rank of outgoing links per node (column-wise). Args: table_name: Abbreviated name of the table. horizon: Number of periods to compute the table. others_only: Indicates wheter to include self-linkages. normalize: Indicates if table should be row-normalized. Returns: out_rank: A (n_series * 1) vector with centrality values. """ graph = self.to_graph( table_name=table_name, horizon=horizon, normalize=normalize, ).reverse() try: out_rank = nx.pagerank_numpy(graph, weight="weight", alpha=0.85) except PowerIterationFailedConvergence: warnings.warn("out-page-rank calculation did not converge") out_rank = {node: np.nan for node in list(graph.nodes)} out_rank = np.fromiter(out_rank.values(), dtype=float).reshape(-1, 1) return out_rank
def summary(file_name): print("in summary") top_n=5 stop_words = stopwords.words('english') summarized_text = [] file_name_edit = file_name.split(".") sentences =[] for sentence in file_name_edit: # print(sentence) sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" ")) print(sentences) # Calling the respective functions sentence_similarity_martix = build_similarity_matrix(sentences, stop_words) # Ranking the sentences by building a graph using the "Networkx" library sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) rank = nx.pagerank_numpy(sentence_similarity_graph) # Sorting the ranked sentences ranked_sentence = sorted(((rank[i],s) for i,s in enumerate(sentences))) #print("Indexes of top ranked_sentence order are ", ranked_sentence) for j in range(top_n): summarized_text.append(" ".join(ranked_sentence[j][1])) print("Summarized Text: \n", ". ".join(summarized_text))
def set_capacities_pagerank_gravity(topology, capacities, capacity_unit='Mbps', alpha=0.85, weight=None): """ Set link capacities proportionally to the product of the Pagerank centralities of the two end-points of the link Parameters ---------- topology : Topology The topology to which link capacities will be set capacities : list A list of all possible capacity values capacity_unit : str, optional The unit in which capacity value is expressed (e.g. Mbps, Gbps etc..) alpha : float, optional The apha parameter of the PageRank algorithm weight : str, optional The name of the link attribute to use for the PageRank algorithm. Valid attributes include *capacity* *delay* and *weight*. If ``None``, all links are assigned the same weight. """ centrality = nx.pagerank_numpy(topology, alpha=alpha, personalization=None, weight=weight) _set_capacities_gravity(topology, capacities, centrality, capacity_unit)
def draw_graph(nodes, edges, graphs_dir, default_lang='all'): lang_graph = nx.MultiDiGraph() lang_graph.add_nodes_from(nodes) for edge in edges: if edges[edge] == 0: lang_graph.add_edge(edge[0], edge[1]) else: lang_graph.add_edge(edge[0], edge[1], weight=float(edges[edge]), label=str(edges[edge])) # print graph info in stdout # degree centrality print('-----------------\n\n') print(default_lang) print(nx.info(lang_graph)) try: # When ties are associated to some positive aspects such as friendship or collaboration, # indegree is often interpreted as a form of popularity, and outdegree as gregariousness. DC = nx.degree_centrality(lang_graph) max_dc = max(DC.values()) max_dc_list = [item for item in DC.items() if item[1] == max_dc] except ZeroDivisionError: max_dc_list = [] # https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%81%D0%BD%D1%8B%D0%B5_%D1%81%D0%B5%D1%82%D0%B8 print('maxdc', str(max_dc_list), sep=': ') # assortativity coef AC = nx.degree_assortativity_coefficient(lang_graph) print('AC', str(AC), sep=': ') # connectivity print("Слабо-связный граф: ", nx.is_weakly_connected(lang_graph)) print("количество слабосвязанных компонент: ", nx.number_weakly_connected_components(lang_graph)) print("Сильно-связный граф: ", nx.is_strongly_connected(lang_graph)) print("количество сильносвязанных компонент: ", nx.number_strongly_connected_components(lang_graph)) print("рекурсивные? компоненты: ", nx.number_attracting_components(lang_graph)) print("число вершинной связности: ", nx.node_connectivity(lang_graph)) print("число рёберной связности: ", nx.edge_connectivity(lang_graph)) # other info print("average degree connectivity: ", nx.average_degree_connectivity(lang_graph)) print("average neighbor degree: ", sorted(nx.average_neighbor_degree(lang_graph).items(), key=itemgetter(1), reverse=True)) # best for small graphs, and our graphs are pretty small print("pagerank: ", sorted(nx.pagerank_numpy(lang_graph).items(), key=itemgetter(1), reverse=True)) plt.figure(figsize=(16.0, 9.0), dpi=80) plt.axis('off') pos = graphviz_layout(lang_graph) nx.draw_networkx_edges(lang_graph, pos, alpha=0.5, arrows=True) nx.draw_networkx(lang_graph, pos, node_size=1000, font_size=12, with_labels=True, node_color='green') nx.draw_networkx_edge_labels(lang_graph, pos, edges) # saving file to draw it with dot-graphviz # changing overall graph view, default is top-bottom lang_graph.graph['graph'] = {'rankdir': 'LR'} # marking with blue nodes with maximum degree centrality for max_dc_node in max_dc_list: lang_graph.node[max_dc_node[0]]['fontcolor'] = 'blue' write_dot(lang_graph, os.path.join(graphs_dir, default_lang + '_links.dot')) # plt.show() plt.savefig(os.path.join(graphs_dir, 'python_' + default_lang + '_graph.png'), dpi=100) plt.close()
def initialized_pagerank(self, g): """ Returns nodes scored by pagerank. :param g: a directed networkx graph :return: a sorted list of tuples of (node, score) Note: Pagerank is modified to always jump to an action node. All action nodes are jumped to equally. """ # Get actions actions = set() for node in g.nodes(): if g.node[node]['type'] == 'action': actions.add(node) # create actions as 'jump points' with even probability #actions = {a: 1/float(len(actions)) for a in actions} dangling = dict() for node in g.nodes(): dangling[node] = 0 for action in actions: dangling[action] = 1 / float(len(actions)) # do the actual scoring scores = nx.pagerank_numpy(g, weight='weight', dangling=dangling) scores = [(k, v) for k, v in scores.iteritems() ] # convert from dictionary so it can be sorted scores.sort(key=itemgetter(1), reverse=True) # Sort the scores return scores
def topology(G): ##degree G_degree = G.degree() ##clustering G_clustering = nx.clustering(G) ##average_clustering G_average_clustering = nx.average_clustering(G) ##diameter #G_diameter = nx.diameter(G) ####average_shortest_path_length #G_average_shortest_path_length = nx.average_shortest_path_length(G) ##degree_centrality (The degree centrality for a node v is the fraction of nodes it is connected to) #G_degree_centrality = nx.degree_centrality(G) ##closeness_centrality (average shortest path distance to u over all n-1 reachable nodes) G_closeness_centrality = nx.closeness_centrality(G) ##betweenness_centrality (Betweenness centrality of a node v is the sum of the fraction of all-pairs shortest paths that pass through v) G_betweenness_centrality = nx.betweenness_centrality(G) ##current_flow_closeness_centrality #G_current_flow_closeness_centrality = nx.current_flow_closeness_centrality(G) ##current_flow_betweenness_centrality #G_current_flow_betweenness_centrality = nx.current_flow_betweenness_centrality(G) ##eigenvector_centrality #G_eigenvector_centrality = nx.eigenvector_centrality(G) #G_pagerank = nx.pagerank(G) G_pagerank = nx.pagerank_numpy(G) return dict( G_degree ), G_clustering, G_closeness_centrality, G_betweenness_centrality, G_pagerank
def features_dict(graph, anchors, use_dist=True, use_pgrs=True, use_pgr=True, use_comm=False, use_comm_centr=False): node_feats = {} n = len(graph) if use_dist: # dists = nx.all_pairs_shortest_path_length(graph) dists = dists_to_anchors(graph, anchors) if use_pgr: pageranks = nx.pagerank_numpy(graph) if use_pgrs: # pgr_anchor = [anchored_pagerank(graph, anchor) for anchor in anchors] pgr_anchor = pageranks_to_anchors(graph, anchors) if use_comm_centr: communicability_centrality = nx.communicability_centrality(graph) if use_comm: communicability = nx.communicability(graph) for node in graph.nodes(): feats = [] if use_dist: feats += [dists[node][anchor] for anchor in anchors] if use_pgrs: feats += [pgr_anchor[anchor][node]*n for anchor in range(len(anchors))] # feats += [pgr[node]*n for pgr in pgr_anchor] if use_pgr: feats.append(pageranks[node]*n) if use_comm_centr: feats.append(communicability_centrality[node]) if use_comm: feats += [communicability[node][anchor] for anchor in anchors] node_feats[node] = np.array(feats) return node_feats
def __calcCentrality(self, G, cnt): ''' For calculating Graph centrality measures ''' cntV = list() if cnt == 'deg': cntV = list(dict(G.degree).values()) elif cnt == 'ei': cntV = list(nx.eigenvector_centrality_numpy(G).values()) elif cnt == 'sh': cntV = list(nx.constraint(G).values()) elif cnt == 'pr': cntV = list(nx.pagerank_numpy(G).values()) elif cnt == 'bw': cntV = list(nx.betweenness_centrality(G).values()) elif cnt == 'cl': cntV = list(nx.clustering(G).values()) elif cnt == 'cc': cntV = list(nx.closeness_centrality(G).values()) elif cnt == 'ec': cntV = list(nx.eccentricity(G).values()) else: raise ValueError( 'calcCettrality: wrong cnt value or not implemented yet') return cntV
def apply_page_rank_algorithm(clean_sentences, sentences_paragraph, word_embeddings, sn): """ Apply the page rank algorithm over the sentence graph to get the text summarization """ sentences_summary = [ x for i, x in enumerate(clean_sentences) if sentences_paragraph.get(i, -1) == 1 ] sentences_summary_emb = [] for i in sentences_summary: if len(i) != 0: v = sum( [word_embeddings.get(w, np.zeros((100, ))) for w in i.split()]) / (len(i.split()) + 0.001) else: v = np.zeros((100, )) sentences_summary_emb.append(v) sim_mat = cosine_similarity(sentences_summary_emb) nx_graph = nx.from_numpy_array(sim_mat) try: scores = nx.pagerank(nx_graph) except: scores = nx.pagerank_numpy(nx_graph) ranked_sentences = sorted( ((scores[i], s) for i, s in enumerate(sentences_summary)), reverse=True) for i in range(sn): print('•', ranked_sentences[i][1], '\n')
def features_matrix(graph, anchors, use_dist=True, use_pgrs=True, use_pgr=True, use_comm=False, use_comm_centr=False): node_feats = [] n = len(graph) if use_dist: dists = nx.all_pairs_shortest_path_length(graph) if use_pgr: pageranks = nx.pagerank_numpy(graph) if use_pgrs: pgr_anchor = [anchored_pagerank(graph, anchor) for anchor in anchors] if use_comm_centr: communicability_centrality = nx.communicability_centrality(graph) if use_comm: communicability = nx.communicability(graph) for node in graph.nodes(): assert node == len(node_feats) feats = [] if use_dist: feats += [dists[node][anchor] for anchor in anchors] if use_pgrs: feats += [pgr[node]*n for pgr in pgr_anchor] if use_pgr: feats.append(pageranks[node]*n) if use_comm_centr: feats.append(communicability_centrality[node]) if use_comm: feats += [communicability[node][anchor] for anchor in anchors] node_feats.append(np.array(feats)) return node_feats
def scores(matrice_similarite, nx=nx, k=3): graph = nx.from_numpy_array(np.array(matrice_similarite)) scores = nx.pagerank_numpy(graph) rank = sorted(scores.items(), key=lambda v: (v[1], v[0]), reverse=True)[:k] rank = [s[0] for s in rank] return rank
def make_fragment(mol_graph, mol_nodes, rings, branches, roots, backbone, rigid_ring, rigid_branch, rigid_backbone, others): vertices = [] nodes_list = list(mol_graph.nodes()) if len(rings)==0 and len(branches)==0 and rigid_backbone == False: vertices = linear_fragment(backbone, others, mol_graph) # print(vertices) elif len(rings)==0 and len(branches)==0 and rigid_backbone == True: vertices = rigid_fragment(mol_graph, backbone) print(vertices) elif len(rings)!=0: if backbone == others or len(backbone)==0: vertices = rings print(vertices) elif rigid_ring == True: vertices_ring = rings vertices_backbone = branch_fragment(backbone, roots, mol_graph) vertices = vertices_ring + vertices_backbone print(vertices) elif len(branches)!=0: if rigid_branch == False: for branch in branches: if len(branch)==1: vertices = simple_fragment(nodes_list) major_nd=[] minor_nd=[] for vertex in vertices: sub_graph = nx.Graph(mol_graph.subgraph(vertex)) sub_centrality = nx.pagerank_numpy(sub_graph, alpha=0.85, weight='w') mjr_nd, mnr_nd = rank_nodes(sub_graph, sub_centrality, mol_graph) for m in mjr_nd: major_nd.append(m) for n in mnr_nd: minor_nd.append(n) return vertices, major_nd, minor_nd
def predict(sample, process, known_nodes=known_nodes): G = nx.from_numpy_array(process, create_using=nx.DiGraph()) series = len(sample) labels = 1 * (sample > 0) up_known_labels = labels[:known_nodes] down_known_labels = np.logical_not(up_known_labels) * 1 unknown_labels = np.zeros((series - known_nodes, )) personal_vector = np.concatenate( (up_known_labels, unknown_labels, down_known_labels, unknown_labels)) keys = list(range(0, series * 2)) personalization = dict(zip(keys, personal_vector)) ranks = nx.pagerank_numpy(G, personalization=personalization) ranks = list(ranks.values()) up_unknown_ranks = np.array(ranks[known_nodes:series]) down_unknown_ranks = np.array(ranks[series + known_nodes:]) prediction = (up_unknown_ranks > down_unknown_ranks) * 1 return prediction
def find_leader(g, alpha=0.85, weight=None): """ Finds the 'leader' in a given graph, /g/, where the leader is defined as the node with the highest PageRank centrality in g. """ pr = nx.pagerank_numpy(g, alpha=alpha, weight=weight) leader = max(pr, key=pr.get) return leader
def semantic_graph_rank(self, topics, phrases): similar_graph = [] topics = list(set(topics)) phrases = list(set(phrases)) topic_sim = self.similarRank.rank(topics, phrases) for item in topic_sim: similar_graph.append(('topic', item[0], item[1])) similar_graph.append((item[0], 'topic', item[1])) for phrase in phrases: tmp = copy.deepcopy(phrases) tmp.remove(phrase) inter_sim = self.similarRank.rank([phrase], tmp) for item in inter_sim: similar_graph.append((phrase, item[0], item[1])) similar_graph.append((item[0], phrase, item[1])) graph = nx.DiGraph() graph.add_weighted_edges_from(similar_graph) scores = nx.pagerank_numpy(graph) scores.pop('topic') sorted_list = sorted(scores.items(), key=lambda x: x[1], reverse=True) max_score = sorted_list[0][1] min_score = sorted_list[-1][1] if max_score - min_score == 0.0: return {item[0]: 1.0 for item in sorted_list} else: return { item[0]: (item[1] - min_score) / (max_score - min_score) for item in sorted_list }
def get_pagerank_broken( g, only_freqs): #pagerank and only_freqs are in a different order page_rank = list(nx.pagerank_numpy(g).values()) print(page_rank) print(only_freqs) corr, _ = pearsonr(page_rank, only_freqs) return corr
def find_abstract(sentences, cutting_model, model_name, limit=3, alpha=0.85): abstract_sentences = [] sentences_num = len(sentences) graph = zeros((sentences_num, sentences_num)) wordlist = [] for sent in sentences: if model_name == 'THU': current_sentence_wordcut = cutting_model.cut(sent, text=True) elif model_name == 'PKU': current_sentence_wordcut = cutting_model.cut(sent) wordlist.append(current_sentence_wordcut) for x in range(sentences_num): for y in range(x, sentences_num): similarity = calc_similarity(wordlist[x], wordlist[y]) graph[x, y] = similarity graph[y, x] = similarity nx_graph = from_numpy_matrix(graph) scores = pagerank_numpy(nx_graph, alpha) sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True) for index, score in sorted_scores[:limit]: item = { "sentence_text": sentences[index], 'score': score, 'index': index } abstract_sentences.append(item) sorted_abstract = sorted(abstract_sentences, key=lambda x: x['index'], reverse=False) abstract = '\n'.join([x['sentence_text'] for x in sorted_abstract]) return abstract
def summarize_sentences_rec(sentences, stop_words, top_n): # def chunks(lst, n): # """Yield successive n-sized chunks from lst.""" # for i in range(0, len(lst), n): # yield lst[i:i + n] def chunks(a, n): k, m = divmod(len(a), n) return (a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)) def chunk_nr(nr_sentences: int): val = math.floor(math.log(nr_sentences, 10)) if val < 2: val = 2 return val if len(sentences) <= 100: sentence_similarity_martix = Summarizer.build_similarity_matrix(sentences, stop_words) sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) scores = nx.pagerank_numpy(sentence_similarity_graph) ranked_sentence = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) candidates = [ranked_sent[1] for ranked_sent in ranked_sentence] return candidates[:top_n] else: chunked_sentences = chunks(sentences, chunk_nr(len(sentences))) summarized_sentences = [] for chunk in chunked_sentences: summarized_chunk_sents = Summarizer.summarize_sentences_rec(chunk, stop_words, top_n) # print('>>>', len(summarized_chunk_sents)) summarized_sentences.extend(summarized_chunk_sents) # print('>>', len(summarized_sentences)) summarized_sentences = Summarizer.summarize_sentences_rec(summarized_sentences, stop_words, top_n) # print('>', len(summarized_sentences)) # print(summarized_sentences) return summarized_sentences[:top_n]
def TargPagerank(self): ''' returns a 2d array containing the pagerank of the target node for all edges ''' probas = np.dot( np.ones((self.number_of_nodes(),1)), np.array(nx.pagerank_numpy(self).values(),dtype=float).reshape(1,-1) ) return probas
def correlation_centrality(G): cor = pd.DataFrame.from_records([ nx.pagerank_numpy(G, weight="weight"), nx.betweenness_centrality(G, weight="weight_inv"), weighted_degree(G, "weight"), nx.degree_centrality(G), ]) return cor.T.corr()
def TargPagerank(self): ''' returns a 2d array containing the pagerank of the target node for all edges ''' probas = np.dot( np.ones((self.number_of_nodes(), 1)), np.array(nx.pagerank_numpy(self).values(), dtype=float).reshape(1, -1)) return probas
def parse_nci(graph_name='nci1.graph', with_structural_features=False): path = "%s/data/nci/" % (current_dir,) if graph_name == 'nci1.graph': maxval = 37 elif graph_name == 'nci109.graph': maxval = 38 with open(path+graph_name,'r') as f: raw = cp.load(f) n_classes = 2 n_graphs = len(raw['graph']) A = [] rX = [] Y = np.zeros((n_graphs, n_classes), dtype='int32') for i in range(n_graphs): # Set label Y[i][raw['labels'][i]] = 1 # Parse graph G = raw['graph'][i] n_nodes = len(G) a = np.zeros((n_nodes,n_nodes), dtype='float32') x = np.zeros((n_nodes,maxval), dtype='float32') for node, meta in G.iteritems(): x[node,meta['label'][0] - 1] = 1 for neighbor in meta['neighbors']: a[node, neighbor] = 1 A.append(a) rX.append(x) if with_structural_features: import networkx as nx for i in range(len(rX)): struct_feat = np.zeros((rX[i].shape[0], 3)) # degree struct_feat[:,0] = A[i].sum(1) G = nx.from_numpy_matrix(A[i]) # pagerank prank = nx.pagerank_numpy(G) struct_feat[:,1] = np.asarray([prank[k] for k in range(A[i].shape[0])]) # clustering clust = nx.clustering(G) struct_feat[:,2] = np.asarray([clust[k] for k in range(A[i].shape[0])]) rX[i] = np.hstack((rX[i],struct_feat)) return A, rX, Y
def test_empty(self): try: import numpy except ImportError: raise SkipTest("numpy not available.") G = networkx.Graph() assert_equal(networkx.pagerank(G), {}) assert_equal(networkx.pagerank_numpy(G), {}) assert_equal(networkx.google_matrix(G).shape, (0, 0))
def get_pagerank(self, damping_factor=0.85): """ Computes normalized page rank of current graph """ pagerank = np.array(nx.pagerank_numpy(self.graph.graph, alpha=damping_factor)).tolist() vals = list(pagerank.values()) vals /= npl.norm(vals) return vals
def test_numpy_pagerank(self): try: import numpy except ImportError: raise SkipTest('numpy not available.') G=self.G p=networkx.pagerank_numpy(G,alpha=0.9) for n in G: assert_almost_equal(p[n],G.pagerank[n],places=4)
def mypagerank(G): dd=nx.pagerank_numpy(G) d = [] for nd in G.nodes(): d += [dd[nd]] avgpr = np.average(d) stdpr = np.std(d) fatpr = fatness(d) return [stdpr,fatpr]
def test_numpy_pagerank(self): try: import numpy except ImportError: raise SkipTest('numpy not available.') G=self.G p=networkx.pagerank_numpy(G,alpha=0.9,tol=1.e-08) for (a,b) in zip(p,self.G.pagerank): assert_almost_equal(a,b)
def test_numpy_pagerank(self): G=self.G try: p=networkx.pagerank_numpy(G,alpha=0.9, tol=1.e-08) for (a,b) in zip(p,self.G.pagerank): assert_almost_equal(a,b) except ImportError: print "Skipping pagerank_numpy test"
def calculate(network): try: n = nx.pagerank_numpy(network) except: return 0 if len(n.values()) == 0: return 0 else: return round(sum(n.values())/len(n.values()), 7)
def get_pagerank_values(self): """ dictionary of pagerank values for the nodes of the graph :param: user_id User Id. :type user_id: int :returns: pagerank calculation :rtype: dict """ return nx.pagerank_numpy(self)
def centrailtyM(A,num=5): G=nx.DiGraph(A) ranks=np.zeros((num,8)) ranks[:,0]=np.argsort(nx.in_degree_centrality(G).values())[::-1][:num] ranks[:,1]=np.argsort(nx.closeness_centrality(G).values())[::-1][:num] ranks[:,2]=np.argsort(nx.betweenness_centrality(G).values())[::-1][:num] ranks[:,3]=np.argsort(nx.eigenvector_centrality_numpy(G).values())[::-1][:num] ranks[:,4]=np.argsort(nx.katz_centrality_numpy(G,weight=None).values())[::-1][:num] ranks[:,5]=np.argsort(nx.pagerank_numpy(G,weight=None).values())[::-1][:num] return ranks
def pagerank_list( idcm, labels ) : """ Takes an internal directed cite matrix and returns a sorted list of the rows by pagerank """ g = nx.DiGraph( idcm ) pr = nx.pagerank_numpy(g) l = list(pr.iteritems()) # now l is a list of (index, pagerank) l.sort( lambda a,b : cmp( b[1],a[1] ) ) return [ (x[0], labels[x[0]], x[1]) for x in l ]
def main(): #read the graph using tab delimiter as an directed graph G =nx.read_adjlist("sample-tiny.txt", delimiter='\t', create_using=nx.DiGraph()) for n in G.nodes(): print n, G.predecessors(n) #nx.write_dot(G, "sample-large2.dot") #for i in range(0,100,10): #pr = nx.pagerank_numpy(G,alpha=0.85)#,max_iter=100) pr_numpy=nx.pagerank_numpy(G) #pr_scipy= nx.pagerank_scipy(G) #print "\n\t", pr print "pagerank numpy:", pr_numpy
def run_metric(metric_name, G, domain, topic, metric_weight, use_norm, fileout, top_x): print '\n>> ' + 'Calculating ' + metric_name + ' for ' + domain + " - " + topic start_time = datetime.now() if metric_name == 'Degree': graph_metric = G.degree(nbunch=None, weight=metric_weight) normalize_metric(G, graph_metric, metric_weight) elif metric_name == 'In Degree': graph_metric = G.in_degree(nbunch=None, weight=metric_weight) normalize_metric(G, graph_metric, metric_weight) elif metric_name == 'Out Degree': graph_metric = G.out_degree(nbunch=None, weight=metric_weight) normalize_metric(G, graph_metric, metric_weight) elif metric_name == 'Closeness Centrality': graph_metric = nx.closeness_centrality(G, distance=None, normalized=use_norm) # use distance as weight? to increase importance as weight increase distance = 1/weight elif metric_name == 'Betweenness Centrality': graph_metric = nx.betweenness_centrality(G, normalized=use_norm, weight=metric_weight) elif metric_name == 'Eigenvector Centrality': try: graph_metric = nx.eigenvector_centrality(G, max_iter=1000) normalize_metric(G, graph_metric, metric_weight) except nx.exception.NetworkXError: # use numpy eigenvector if fail to converge print "power method for calculating eigenvector did not converge, using numpy" graph_metric = nx.eigenvector_centrality_numpy(G) normalize_metric(G, graph_metric, metric_weight) elif metric_name == 'Pagerank': try: graph_metric = nx.pagerank(G, weight=metric_weight) normalize_metric(G, graph_metric, metric_weight) except: # use numpy if fails to converge print "power method for calculating pagerank did not converge, using numpy" graph_metric = nx.pagerank_numpy(G, weight=metric_weight) normalize_metric(G, graph_metric, metric_weight) end_time = datetime.now() print "Calculation completed in: " + str(end_time - start_time) # append the entire list to the output file append_to_file(graph_metric, fileout, domain, topic, metric_name) ''' ### output to screen the top x results # convert to a list of tuples graph_metric = graph_metric.items() # sort graph_metric.sort(key=lambda tup: -tup[1]) # get and print the top X # print metric_results(graph_metric) top_list = take(top_x, graph_metric) for item in top_list: print ((item[0]) + "," + str(item[1])) ''' return graph_metric
def pagerank(self): #Compute the page rank of the graph logging.info("Inside pagerank module") pagerank_dict = nx.pagerank_numpy(self.G) logging.info("Page rank dict length is %s" % (len(pagerank_dict.values()))) pagerank_sorted_list = sorted(pagerank_dict.items(), key=lambda x:x[1], reverse=True)[:3] for a,b in pagerank_sorted_list: logging.info("Page rank cent for %s is %s" % (a, b)) pagerank_dict = {}
def get_pagerank(self): '''Create a co-sponsorship digraph based on the information from the Open States API and calculate the pagerank of each legislator. ''' ids = set() G = networkx.DiGraph() number_of_bills = 0 for bill in self.bills: sponsors = bill['sponsors'] # if len(sponsors) < 2: # continue # Separate sponsors into primary, secondary. primary = [] secondary = [] for sponsor in sponsors: if sponsor['leg_id'] is None: continue if sponsor['type'] == 'primary': primary.append(sponsor['leg_id']) else: secondary.append(sponsor['leg_id']) ids.add(sponsor['leg_id']) # Add them to the network. if primary and secondary: for primary, secondary in product(primary, secondary): try: G[secondary][primary]['weight'] += 1 except KeyError: G.add_edge(secondary, primary, weight=1) elif primary: for edge in combinations(primary, r=2): for p1, p2 in [edge, edge[::-1]]: try: G[p1][p2]['weight'] += 1 except KeyError: G.add_edge(p1, p2, weight=1) if not G.nodes(): # Known offenders: CO, AR, CT, ID, and others. # Reuturn all zeroes. # return dict.fromkeys(ids, 0) data = dict(abbr=self.abbr, chamber=self.chamber) msg = ("Can't generate PageRank scores due to lack of secondary " "sponsorship data: %r.") raise DataQualityError(msg % (data,)) return networkx.pagerank_numpy(G)
def linkrank(G): c = arange(len(G.nodes())) goo = nx.google_matrix(G) goo = array(goo) m = nx.pagerank_numpy(G) m = m.items() m = [i[1] for i in m] m = array([m]) m = m.T L = tile(m,[1,len(goo)])*goo Q = 0 mm = tile(m,[1,len(goo)])*tile(m.T,[len(goo),1]) Qlr = L - mm return greedyMax(Qlr,c,0)
def lexR(self, graph): """ Compute the LexRank of the sentences. LexRank of a sentence in the sentence graph is the PageRank of the node representing the sentence. It is a measure of the importance and influence of the sentence in the corpus. Arguments: graph - A networkx graph or digraph. Returns: A dictionary of all the nodes with their PageRank scores. """ pr = nx.pagerank_numpy(graph, alpha=0.85) return pr
def _anchored_pagerank(graph, anchors, normalize=False): n = len(graph) pgranks = dict((node, {}) for node in graph.nodes()) for anchor in anchors: weights = dict((i, 0) for i in graph.nodes()) weights[anchor] = 1 pgr = nx.pagerank_numpy(graph, personalization=weights) for node, v in pgr.items(): pgranks[node][anchor] = v for node, pgr in pgranks.items(): pgranks[node] = d = np.array([pgr[a] for a in anchors]) if normalize: pgranks[node] = normalized(d) # d[n] = normalized_dict(pgr) return pgranks