def PPR(A, edge_index): # The Personalized PageRank heuristic score. # Need install fast_pagerank by "pip install fast-pagerank" # Too slow for large datasets now. from fast_pagerank import pagerank_power num_nodes = A.shape[0] src_index, sort_indices = torch.sort(edge_index[0]) dst_index = edge_index[1, sort_indices] edge_index = torch.stack([src_index, dst_index]) #edge_index = edge_index[:, :50] scores = [] visited = set([]) j = 0 for i in tqdm(range(edge_index.shape[1])): if i < j: continue src = edge_index[0, i] personalize = np.zeros(num_nodes) personalize[src] = 1 ppr = pagerank_power(A, p=0.85, personalize=personalize, tol=1e-7) j = i while edge_index[0, j] == src: j += 1 if j == edge_index.shape[1]: break all_dst = edge_index[1, i:j] cur_scores = ppr[all_dst] if cur_scores.ndim == 0: cur_scores = np.expand_dims(cur_scores, 0) scores.append(np.array(cur_scores)) scores = np.concatenate(scores, 0) return torch.FloatTensor(scores), edge_index
def PageRank_with_personalised_vector(Network, Init, PATH_TO_DATA): n = len([ f for f in os.listdir(PATH_TO_DATA) if os.path.isfile(os.path.join(PATH_TO_DATA, f)) ]) pagerank = pagerank_power(Network, p=0.85, personalize=Init) R = np.array([]) for v in range(0, n): if pagerank[v] - pagerank[v + n] > 0: R = np.insert(R, len(R), +1) else: R = np.insert(R, len(R), -1) return R
def compute_pagerank(graph, id2synset, f): num_synsets = len(id2synset) for i, (synset_id, synset) in enumerate(id2synset.items()): if i % 100 == 0: print(' {}/{}'.format(i, num_synsets)) personalize = np.zeros((num_synsets, )) personalize[synset_id] = 1.0 ppr = pagerank_power(graph, p=0.85, personalize=personalize) ppr = [(id2synset[s].name(), v) for s, v in enumerate(ppr)] sorted_ppr = sorted(ppr, key=lambda kv: kv[1], reverse=True)[:100] sorted_ppr = ['{}={}'.format(s, v) for s, v in sorted_ppr if v > 0.] line = ' '.join(sorted_ppr) f.write('{} {}\n'.format(synset.name(), line))
def pagerank_fast_mat(M, personal_vec): S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M ppr_mat = [] for i in range(25000): st = time.time() pr = pagerank_power(M, p=0.85, personalize=personal_vec[:, i], tol=1e-6) #pr=pagerank(M, p=0.85, personalize=personal_vec[:, i]) ppr_mat.append(pr) return np.array(ppr_mat)
def pagerank_fast(G, personal_vec): M = nx.to_scipy_sparse_matrix(G, nodelist=G.nodes(), weight='weight', dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M ppr_mat = [] for i in range(personal_vec.shape[1]): pr = pagerank_power(M, p=0.85, personalize=personal_vec[:, i], tol=1e-6) #pr=pagerank(M, p=0.85, personalize=personal_vec[:, i]) ppr_mat.append(pr) return np.array(ppr_mat)
def score(self, data): node_dict = {node['name']: i for i, node in enumerate(data['node'])} node_count = len(node_dict) edges = np.array([(node_dict[edge['node'][0]], node_dict[edge['node'][1]]) for edge in data['edge']]) weights = np.array([edge['weight'] for edge in data['edge']]) G = sparse.csr_matrix((weights, (edges[:, 0], edges[:, 1])), shape=(node_count, node_count)) if data['node'][0].get('weight'): personalize = np.array([node['weight'] for node in data['node']]) else: personalize = None if self.solver == 'power': pr = pagerank_power(G, p=self.damping_factor, personalize=personalize, tol=self.tol) else: pr = pagerank(G, p=self.damping_factor, personalize=personalize) return {k: pr[v] for k, v in node_dict.items()}
sys.exit(0) # normalize edge weights total_sum = np.sum(W) W /= total_sum # calculate max dimension print("Ranking\t2\tTransforming Adjacency Matrix", flush=True) N = max(np.amax(A[:, 0]), np.amax(A[:, 1])) + 1 # transform to sparse matrix representation G = sparse.csr_matrix((W, (A[:, 0], A[:, 1])), shape=(N, N)) # run pagerank print("Ranking\t3\tExecuting Ranking Algorithm", flush=True) PR = pagerank_power(G, p=alpha, tol=tol) # sorting output print("Ranking\t4\tSorting Results", flush=True) sorted_indices = np.argsort(PR)[::-1][:len(PR)] # filter out indices that are not in A indices = np.unique(np.concatenate((A[:, 0], A[:, 1]))) sorted_indices = sorted_indices[np.isin(sorted_indices, indices)] # write results to output file print("Ranking\t5\tWriting Results", flush=True) with open(outfile, 'w', newline='') as csvfile: filewriter = csv.writer(csvfile, delimiter='\t', quotechar='"',
def test_power_empty_graph(self): calculated_pagerank = pagerank_power(self.G5, p=self.p5, personalize=self.personalize5) self.assertEqual(calculated_pagerank.size, 0)
def test_power_zero_edge(self): calculated_pagerank = pagerank_power(self.G4, p=self.p4, personalize=self.personalize4) assert_allclose(calculated_pagerank, self.pr4, rtol=0, atol=1e-04)
def test_power_single_edge(self): calculated_pagerank = pagerank_power(self.G3, p=self.p3, personalize=self.personalize3) assert_allclose(calculated_pagerank, self.pr3, rtol=0, atol=1e-04)
def test_power_pagerank_2(self): calculated_pagerank = pagerank_power(self.G2, p=self.p2, personalize=self.personalize2) assert_allclose(calculated_pagerank, self.pr2, rtol=0, atol=1e-04)
def test_power_pagerank_1(self): calculated_pagerank = pagerank_power(self.G1, p=self.p1, personalize=self.personalize1) assert_allclose(calculated_pagerank, self.pr1, rtol=0, atol=1e-04)