Beispiel #1
0
class ScikitNetworkScorer:
    def __init__(self,
                 algo=None,
                 damping_factor=0.85,
                 solver='naive',
                 n_iter=10,
                 tol=0,
                 undirected=False,
                 method='exact'):
        if algo == 'diffusion':
            self.scorer = Diffusion(n_iter=n_iter)
        elif algo == 'closeness':
            self.scorer = Closeness(method=method, tol=tol)
        elif algo == 'harmonic':
            self.scorer = Harmonic()
        else:
            self.scorer = PageRank(damping_factor=damping_factor,
                                   solver=solver,
                                   n_iter=n_iter,
                                   tol=tol)
        self.undirected = undirected

    def score(self, data):
        node_dict = {node['name']: i for i, node in enumerate(data['node'])}
        edges = np.array([(node_dict[edge['node'][0]],
                           node_dict[edge['node'][1]], edge['weight'])
                          for edge in data['edge']])
        adjacency = edgelist2adjacency(edges, undirected=self.undirected)
        if data['node'][0].get('weight'):
            seeds = np.array([node['weight'] for node in data['node']])
            scores = self.scorer.fit_transform(adjacency, seeds)
        else:
            scores = self.scorer.fit_transform(adjacency)
        return {k: scores[v] for k, v in node_dict.items()}
Beispiel #2
0
def pagerank_scikit(G, sim_mat, user_idx, alpha, beta):
    nodelist = G.nodes()
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=nodelist,
                                  weight='weight',
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M
    M = beta * M + (1 - beta) * sim_mat

    pagerank = PageRank(damping_factor=alpha)

    ppr_mat = []
    print_every = int(len(user_idx) / 3)
    s = time.time()
    for i in user_idx:
        seeds = {i: 1}
        pr = pagerank.fit_transform(M, seeds)
        ppr_mat.append(pr)
        if (i + 1) % print_every == 0:
            print('{}% {}sec'.format(i / len(user_idx) * 100, time.time() - s))

    return np.array(ppr_mat)
Beispiel #3
0
    def compute_rank(self, file_name):
        x = csr_matrix((self.v, (self.b, self.a)),
                       shape=(len(self.destin_idx), len(self.destin_idx)),
                       dtype=float)
        print(x)
        adjacency = x.multiply(x.transpose())
        pagerank = PageRank()
        scores = pagerank.fit_transform(adjacency)
        image = svg_graph(adjacency,
                          names=self.destin_names,
                          scores=scores,
                          display_node_weight=True,
                          node_order=np.argsort(scores))
        with open(file_name, "w") as text_file:
            print(file_name)
            print(scores)
            text_file.write(image)

        print(self.v)
        print(self.destin_names)

        paris = Paris()
        dendrogram = paris.fit_transform(adjacency)

        image = svg_dendrogram(dendrogram,
                               self.destin_names,
                               n_clusters=5,
                               rotate=True)
        with open("dento_" + file_name, "w") as text_file:
            text_file.write(image)
Beispiel #4
0
 def __init__(self,
              damping_factor: float = 0.85,
              solver: str = 'naive',
              n_iter: int = 10,
              tol: float = 0.,
              n_jobs: Optional[int] = None,
              verbose: bool = False):
     algorithm = PageRank(damping_factor, solver, n_iter, tol)
     super(PageRankClassifier, self).__init__(algorithm, n_jobs, verbose)
Beispiel #5
0
 def __init__(self,
              algo=None,
              damping_factor=0.85,
              solver='naive',
              n_iter=10,
              tol=0,
              undirected=False,
              method='exact'):
     if algo == 'diffusion':
         self.scorer = Diffusion(n_iter=n_iter)
     elif algo == 'closeness':
         self.scorer = Closeness(method=method, tol=tol)
     elif algo == 'harmonic':
         self.scorer = Harmonic()
     else:
         self.scorer = PageRank(damping_factor=damping_factor,
                                solver=solver,
                                n_iter=n_iter,
                                tol=tol)
     self.undirected = undirected
Beispiel #6
0
    def get_triples_of_event(self, seed_vertex, topN=10):
        """
        先得到种子点,然后执行personal PageRank得到与种子点相关的triples
        :return:
        """
        triples_of_event = []
        topN = topN if topN < len(self.unique_triples) else len(
            self.unique_triples)
        pagerank = PageRank()
        scores = pagerank.fit_transform(
            self.edge_weight, {seed_vertex: 1})  # 对每个种子点运行Personal PageRank
        idx_sorted = np.argsort(-scores)
        for idx in idx_sorted[:topN]:
            triples_of_event.append(self.unique_triples[idx])

        debug_logger.debug("seed vertex: {}".format(seed_vertex))
        for idx in idx_sorted:
            debug_logger.debug("weight: {}, triple: {}".format(
                scores[idx], self.unique_triples[idx].to_string()))

        return triples_of_event
Beispiel #7
0
class RelavanceScore(TorchModel):
    '''
    Calculate relavance score between countries and concepts
    Return a N*M matrix
    N: country num
    M: concept num
    '''
    def __init__(self):
        super(RelavanceScore, self).__init__()
        self.pagerank = PageRank()
        
    def run(self, adj, seeds, paper_id, country_id, concept_id, paper_country, paper_concept):
        pr_scores = self.pagerank.fit_transform(adj, seeds) # pagerank scores
        w_paper = pr_scores[0:len(paper_id)]
        w_paper /= w_paper.sum() # normalize the paper weight
        
        # calculate the paper-county relavance score
        paper_country_edge = []
        country = set()
        for p in paper_country:
            for c in paper_country[p]:
                country.add(c)
                paper_country_edge.append((paper_id[p], \
                                           country_id[c] - (len(paper_id) + len(concept_id)), 
                                           1 / len(paper_country[p])))
        # add the countries without papers and set their edges to 0 
        for c in country_id:
            if c not in country:
                paper_country_edge.append((paper_id[0], \
                                           country_id[c] - (len(paper_id) + len(concept_id)), 0))
        country_paper_mat = sknetwork.utils.edgelist2biadjacency(paper_country_edge).transpose()
        
        # calculate the paper-concept relavance score
        paper_concept_edge = []
        concept = set()
        for p in paper_country:
            # add the paper without concepts and set their edges to 0
            if p not in paper_concept:
                paper_concept_edge.append((paper_id[p], concept_id[72] - (len(paper_id)), 0))
                continue
            for c in paper_concept[p]:
                paper_concept_edge.append((paper_id[p], concept_id[c] - (len(paper_id)), \
                                           1 / len(paper_concept[p]) * w_paper[paper_id[p]]))
                concept.add(c)
        # add the concepts not belonging to any papers and set their edges to 0
        for c in concept_id:
            if c not in concept:
                paper_concept_edge.append((paper_id[0], concept_id[c] - (len(paper_id)), 0))
        paper_concept_mat = sknetwork.utils.edgelist2biadjacency(paper_concept_edge)
        country_concept = country_paper_mat.dot(paper_concept_mat)
        return country_concept
Beispiel #8
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Union[np.ndarray, dict]) -> 'MultiRank':
        """Compute personalized PageRank using each given labels as seed set.

        Parameters
        ----------
        adjacency:
            Adjacency matrix of the graph.
        seeds: Dict or ndarray,
            If dict, ``(key, val)`` indicates that node ``key`` has label ``val``.
            If ndarray, ``seeds[i] = val`` indicates that node ``i`` has label ``val``.
            Negative values are treated has no label.

        Returns
        -------
        self: :class:`MultiRank`

        """
        if isinstance(self, BiMultiRank):
            pr = BiPageRank(self.damping_factor, self.solver)
        else:
            pr = PageRank(self.damping_factor, self.solver)

        seeds_labels = check_seeds(seeds, adjacency)
        classes, n_classes = check_labels(seeds_labels)

        n: int = adjacency.shape[0]
        personalizations = []
        for label in classes:
            personalization = np.array(seeds_labels == label).astype(int)
            personalizations.append(personalization)

        if self.n_jobs != 1:
            local_function = partial(pr.fit_transform, adjacency)
            with Pool(self.n_jobs) as pool:
                membership = np.array(
                    pool.map(local_function, personalizations))
            membership = membership.T
        else:
            membership = np.zeros((n, n_classes))
            for i in range(n_classes):
                membership[:, i] = pr.fit_transform(
                    adjacency, personalization=personalizations[i])[:n]

        norm = np.sum(membership, axis=1)
        membership[norm > 0] /= norm[norm > 0, np.newaxis]

        self.membership_ = membership
        return self
Beispiel #9
0
def pagerank_scikit(G):
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=G.nodes(),
                                  weight='weight',
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M
    pagerank = PageRank()

    ppr_mat = []
    for i in range(M.shape[0]):
        seeds = {i: 1}
        pr = pagerank.fit_transform(M, seeds)
        #print(pr.shape)
        #print(pr)

        ppr_mat.append(pr)
    return np.array(ppr_mat)
Beispiel #10
0
 def __init__(self):
     super(RelavanceScore, self).__init__()
     self.pagerank = PageRank()
Beispiel #11
0
from sknetwork.ranking import PageRank
from sknetwork.data import load_edge_list, house

adjacency = house()

pagerank = PageRank(solver='push')
scores = pagerank.fit_transform(adjacency)
print(scores)
"""
the result should be like: [0.17301832 0.22442742 0.1823948  0.18926552 0.23089394]
"""
time_start = time.time()
n = adjacency.shape[0]
degrees = adjacency.dot(np.ones(n)).astype(np.int32)
rev_adjacency = adjacency.transpose().tocsr()

indptr = adjacency.indptr.astype(np.int32)
indices = adjacency.indices.astype(np.int32)
rev_indptr = rev_adjacency.indptr.astype(np.int32)
rev_indices = rev_adjacency.indices.astype(np.int32)

scores = push_pagerank(n, degrees, indptr, indices, rev_indptr, rev_indices,
                       seeds.astype(np.float32), damping_factor, tol)
time_end = time.time()
print("Push Calculation time:", time_end - time_start, "seconds")
print("Result:")
print_highest_lowest_values(scores, pages)
print("Chess rang:")
print(get_rang(scores, 2597))
print(get_rang(scores, 26634))
print(get_rang(scores, 229857))

print("------")

# Scikit Network
time_start = time.time()
pagerank = PageRank()
scores = pagerank.fit_transform(adjacency, seeds)
time_end = time.time()
print("Sknetwork power iteration time:", time_end - time_start, "seconds")
print("Result:")
print_highest_lowest_values(scores, pages)
Beispiel #13
0
from IPython.display import SVG
import numpy as np
from sknetwork.data import karate_club, painters, movie_actor
from sknetwork.data import load_edge_list
from sknetwork.ranking import PageRank, BiPageRank
from sknetwork.visualization import svg_graph, svg_digraph, svg_bigraph

if __name__ == '__main__':
    graph = load_edge_list('test.csv', directed=True, fast_format=False)
    adjacency = graph.adjacency
    "position = graph.position"
    pagerank = PageRank(damping_factor=0.15)
    scores = pagerank.fit_transform(adjacency)
    print(scores)
    sum = sum(scores)
    print(sum)
    "image = svg_graph(adjacency, position, scores=np.log(scores))"
    "SVG(image)"