Ejemplo n.º 1
0
def PPR(A, edge_index):
    # The Personalized PageRank heuristic score.
    # Need install fast_pagerank by "pip install fast-pagerank"
    # Too slow for large datasets now.
    from fast_pagerank import pagerank_power
    num_nodes = A.shape[0]
    src_index, sort_indices = torch.sort(edge_index[0])
    dst_index = edge_index[1, sort_indices]
    edge_index = torch.stack([src_index, dst_index])
    #edge_index = edge_index[:, :50]
    scores = []
    visited = set([])
    j = 0
    for i in tqdm(range(edge_index.shape[1])):
        if i < j:
            continue
        src = edge_index[0, i]
        personalize = np.zeros(num_nodes)
        personalize[src] = 1
        ppr = pagerank_power(A, p=0.85, personalize=personalize, tol=1e-7)
        j = i
        while edge_index[0, j] == src:
            j += 1
            if j == edge_index.shape[1]:
                break
        all_dst = edge_index[1, i:j]
        cur_scores = ppr[all_dst]
        if cur_scores.ndim == 0:
            cur_scores = np.expand_dims(cur_scores, 0)
        scores.append(np.array(cur_scores))

    scores = np.concatenate(scores, 0)
    return torch.FloatTensor(scores), edge_index
Ejemplo n.º 2
0
def PageRank_with_personalised_vector(Network, Init, PATH_TO_DATA):
    n = len([
        f for f in os.listdir(PATH_TO_DATA)
        if os.path.isfile(os.path.join(PATH_TO_DATA, f))
    ])
    pagerank = pagerank_power(Network, p=0.85, personalize=Init)
    R = np.array([])
    for v in range(0, n):
        if pagerank[v] - pagerank[v + n] > 0:
            R = np.insert(R, len(R), +1)
        else:
            R = np.insert(R, len(R), -1)
    return R
Ejemplo n.º 3
0
def compute_pagerank(graph, id2synset, f):
    num_synsets = len(id2synset)
    for i, (synset_id, synset) in enumerate(id2synset.items()):
        if i % 100 == 0:
            print('  {}/{}'.format(i, num_synsets))
        personalize = np.zeros((num_synsets, ))
        personalize[synset_id] = 1.0
        ppr = pagerank_power(graph, p=0.85, personalize=personalize)
        ppr = [(id2synset[s].name(), v) for s, v in enumerate(ppr)]
        sorted_ppr = sorted(ppr, key=lambda kv: kv[1], reverse=True)[:100]
        sorted_ppr = ['{}={}'.format(s, v) for s, v in sorted_ppr if v > 0.]
        line = ' '.join(sorted_ppr)
        f.write('{} {}\n'.format(synset.name(), line))
Ejemplo n.º 4
0
def pagerank_fast_mat(M, personal_vec):
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M

    ppr_mat = []
    for i in range(25000):
        st = time.time()
        pr = pagerank_power(M,
                            p=0.85,
                            personalize=personal_vec[:, i],
                            tol=1e-6)
        #pr=pagerank(M, p=0.85, personalize=personal_vec[:, i])
        ppr_mat.append(pr)
    return np.array(ppr_mat)
Ejemplo n.º 5
0
def pagerank_fast(G, personal_vec):
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=G.nodes(),
                                  weight='weight',
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M

    ppr_mat = []
    for i in range(personal_vec.shape[1]):
        pr = pagerank_power(M,
                            p=0.85,
                            personalize=personal_vec[:, i],
                            tol=1e-6)
        #pr=pagerank(M, p=0.85, personalize=personal_vec[:, i])
        ppr_mat.append(pr)
    return np.array(ppr_mat)
Ejemplo n.º 6
0
 def score(self, data):
     node_dict = {node['name']: i for i, node in enumerate(data['node'])}
     node_count = len(node_dict)
     edges = np.array([(node_dict[edge['node'][0]],
                        node_dict[edge['node'][1]])
                       for edge in data['edge']])
     weights = np.array([edge['weight'] for edge in data['edge']])
     G = sparse.csr_matrix((weights, (edges[:, 0], edges[:, 1])),
                           shape=(node_count, node_count))
     if data['node'][0].get('weight'):
         personalize = np.array([node['weight'] for node in data['node']])
     else:
         personalize = None
     if self.solver == 'power':
         pr = pagerank_power(G,
                             p=self.damping_factor,
                             personalize=personalize,
                             tol=self.tol)
     else:
         pr = pagerank(G, p=self.damping_factor, personalize=personalize)
     return {k: pr[v] for k, v in node_dict.items()}
Ejemplo n.º 7
0
    sys.exit(0)

# normalize edge weights
total_sum = np.sum(W)
W /= total_sum

# calculate max dimension
print("Ranking\t2\tTransforming Adjacency Matrix", flush=True)
N = max(np.amax(A[:, 0]), np.amax(A[:, 1])) + 1

# transform to sparse matrix representation
G = sparse.csr_matrix((W, (A[:, 0], A[:, 1])), shape=(N, N))

# run pagerank
print("Ranking\t3\tExecuting Ranking Algorithm", flush=True)
PR = pagerank_power(G, p=alpha, tol=tol)

# sorting output
print("Ranking\t4\tSorting Results", flush=True)
sorted_indices = np.argsort(PR)[::-1][:len(PR)]

# filter out indices that are not in A
indices = np.unique(np.concatenate((A[:, 0], A[:, 1])))
sorted_indices = sorted_indices[np.isin(sorted_indices, indices)]

# write results to output file
print("Ranking\t5\tWriting Results", flush=True)
with open(outfile, 'w', newline='') as csvfile:
    filewriter = csv.writer(csvfile,
                            delimiter='\t',
                            quotechar='"',
 def test_power_empty_graph(self):
     calculated_pagerank = pagerank_power(self.G5,
                                          p=self.p5,
                                          personalize=self.personalize5)
     self.assertEqual(calculated_pagerank.size, 0)
 def test_power_zero_edge(self):
     calculated_pagerank = pagerank_power(self.G4,
                                          p=self.p4,
                                          personalize=self.personalize4)
     assert_allclose(calculated_pagerank, self.pr4, rtol=0, atol=1e-04)
 def test_power_single_edge(self):
     calculated_pagerank = pagerank_power(self.G3,
                                          p=self.p3,
                                          personalize=self.personalize3)
     assert_allclose(calculated_pagerank, self.pr3, rtol=0, atol=1e-04)
    def test_power_pagerank_2(self):

        calculated_pagerank = pagerank_power(self.G2,
                                             p=self.p2,
                                             personalize=self.personalize2)
        assert_allclose(calculated_pagerank, self.pr2, rtol=0, atol=1e-04)
 def test_power_pagerank_1(self):
     calculated_pagerank = pagerank_power(self.G1,
                                          p=self.p1,
                                          personalize=self.personalize1)
     assert_allclose(calculated_pagerank, self.pr1, rtol=0, atol=1e-04)