예제 #1
0
    def compute_rank(self, file_name):
        x = csr_matrix((self.v, (self.b, self.a)),
                       shape=(len(self.destin_idx), len(self.destin_idx)),
                       dtype=float)
        print(x)
        adjacency = x.multiply(x.transpose())
        pagerank = PageRank()
        scores = pagerank.fit_transform(adjacency)
        image = svg_graph(adjacency,
                          names=self.destin_names,
                          scores=scores,
                          display_node_weight=True,
                          node_order=np.argsort(scores))
        with open(file_name, "w") as text_file:
            print(file_name)
            print(scores)
            text_file.write(image)

        print(self.v)
        print(self.destin_names)

        paris = Paris()
        dendrogram = paris.fit_transform(adjacency)

        image = svg_dendrogram(dendrogram,
                               self.destin_names,
                               n_clusters=5,
                               rotate=True)
        with open("dento_" + file_name, "w") as text_file:
            text_file.write(image)
예제 #2
0
def pagerank_scikit(G, sim_mat, user_idx, alpha, beta):
    nodelist = G.nodes()
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=nodelist,
                                  weight='weight',
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M
    M = beta * M + (1 - beta) * sim_mat

    pagerank = PageRank(damping_factor=alpha)

    ppr_mat = []
    print_every = int(len(user_idx) / 3)
    s = time.time()
    for i in user_idx:
        seeds = {i: 1}
        pr = pagerank.fit_transform(M, seeds)
        ppr_mat.append(pr)
        if (i + 1) % print_every == 0:
            print('{}% {}sec'.format(i / len(user_idx) * 100, time.time() - s))

    return np.array(ppr_mat)
예제 #3
0
 def __init__(self,
              damping_factor: float = 0.85,
              solver: str = 'naive',
              n_iter: int = 10,
              tol: float = 0.,
              n_jobs: Optional[int] = None,
              verbose: bool = False):
     algorithm = PageRank(damping_factor, solver, n_iter, tol)
     super(PageRankClassifier, self).__init__(algorithm, n_jobs, verbose)
예제 #4
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Union[np.ndarray, dict]) -> 'MultiRank':
        """Compute personalized PageRank using each given labels as seed set.

        Parameters
        ----------
        adjacency:
            Adjacency matrix of the graph.
        seeds: Dict or ndarray,
            If dict, ``(key, val)`` indicates that node ``key`` has label ``val``.
            If ndarray, ``seeds[i] = val`` indicates that node ``i`` has label ``val``.
            Negative values are treated has no label.

        Returns
        -------
        self: :class:`MultiRank`

        """
        if isinstance(self, BiMultiRank):
            pr = BiPageRank(self.damping_factor, self.solver)
        else:
            pr = PageRank(self.damping_factor, self.solver)

        seeds_labels = check_seeds(seeds, adjacency)
        classes, n_classes = check_labels(seeds_labels)

        n: int = adjacency.shape[0]
        personalizations = []
        for label in classes:
            personalization = np.array(seeds_labels == label).astype(int)
            personalizations.append(personalization)

        if self.n_jobs != 1:
            local_function = partial(pr.fit_transform, adjacency)
            with Pool(self.n_jobs) as pool:
                membership = np.array(
                    pool.map(local_function, personalizations))
            membership = membership.T
        else:
            membership = np.zeros((n, n_classes))
            for i in range(n_classes):
                membership[:, i] = pr.fit_transform(
                    adjacency, personalization=personalizations[i])[:n]

        norm = np.sum(membership, axis=1)
        membership[norm > 0] /= norm[norm > 0, np.newaxis]

        self.membership_ = membership
        return self
예제 #5
0
def pagerank_scikit(G):
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=G.nodes(),
                                  weight='weight',
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M
    pagerank = PageRank()

    ppr_mat = []
    for i in range(M.shape[0]):
        seeds = {i: 1}
        pr = pagerank.fit_transform(M, seeds)
        #print(pr.shape)
        #print(pr)

        ppr_mat.append(pr)
    return np.array(ppr_mat)
예제 #6
0
 def __init__(self,
              algo=None,
              damping_factor=0.85,
              solver='naive',
              n_iter=10,
              tol=0,
              undirected=False,
              method='exact'):
     if algo == 'diffusion':
         self.scorer = Diffusion(n_iter=n_iter)
     elif algo == 'closeness':
         self.scorer = Closeness(method=method, tol=tol)
     elif algo == 'harmonic':
         self.scorer = Harmonic()
     else:
         self.scorer = PageRank(damping_factor=damping_factor,
                                solver=solver,
                                n_iter=n_iter,
                                tol=tol)
     self.undirected = undirected
예제 #7
0
    def get_triples_of_event(self, seed_vertex, topN=10):
        """
        先得到种子点,然后执行personal PageRank得到与种子点相关的triples
        :return:
        """
        triples_of_event = []
        topN = topN if topN < len(self.unique_triples) else len(
            self.unique_triples)
        pagerank = PageRank()
        scores = pagerank.fit_transform(
            self.edge_weight, {seed_vertex: 1})  # 对每个种子点运行Personal PageRank
        idx_sorted = np.argsort(-scores)
        for idx in idx_sorted[:topN]:
            triples_of_event.append(self.unique_triples[idx])

        debug_logger.debug("seed vertex: {}".format(seed_vertex))
        for idx in idx_sorted:
            debug_logger.debug("weight: {}, triple: {}".format(
                scores[idx], self.unique_triples[idx].to_string()))

        return triples_of_event
예제 #8
0
 def __init__(self):
     super(RelavanceScore, self).__init__()
     self.pagerank = PageRank()
예제 #9
0
from sknetwork.ranking import PageRank
from sknetwork.data import load_edge_list, house

adjacency = house()

pagerank = PageRank(solver='push')
scores = pagerank.fit_transform(adjacency)
print(scores)
"""
the result should be like: [0.17301832 0.22442742 0.1823948  0.18926552 0.23089394]
"""
time_start = time.time()
n = adjacency.shape[0]
degrees = adjacency.dot(np.ones(n)).astype(np.int32)
rev_adjacency = adjacency.transpose().tocsr()

indptr = adjacency.indptr.astype(np.int32)
indices = adjacency.indices.astype(np.int32)
rev_indptr = rev_adjacency.indptr.astype(np.int32)
rev_indices = rev_adjacency.indices.astype(np.int32)

scores = push_pagerank(n, degrees, indptr, indices, rev_indptr, rev_indices,
                       seeds.astype(np.float32), damping_factor, tol)
time_end = time.time()
print("Push Calculation time:", time_end - time_start, "seconds")
print("Result:")
print_highest_lowest_values(scores, pages)
print("Chess rang:")
print(get_rang(scores, 2597))
print(get_rang(scores, 26634))
print(get_rang(scores, 229857))

print("------")

# Scikit Network
time_start = time.time()
pagerank = PageRank()
scores = pagerank.fit_transform(adjacency, seeds)
time_end = time.time()
print("Sknetwork power iteration time:", time_end - time_start, "seconds")
print("Result:")
print_highest_lowest_values(scores, pages)
예제 #11
0
from IPython.display import SVG
import numpy as np
from sknetwork.data import karate_club, painters, movie_actor
from sknetwork.data import load_edge_list
from sknetwork.ranking import PageRank, BiPageRank
from sknetwork.visualization import svg_graph, svg_digraph, svg_bigraph

if __name__ == '__main__':
    graph = load_edge_list('test.csv', directed=True, fast_format=False)
    adjacency = graph.adjacency
    "position = graph.position"
    pagerank = PageRank(damping_factor=0.15)
    scores = pagerank.fit_transform(adjacency)
    print(scores)
    sum = sum(scores)
    print(sum)
    "image = svg_graph(adjacency, position, scores=np.log(scores))"
    "SVG(image)"