class ScikitNetworkScorer: def __init__(self, algo=None, damping_factor=0.85, solver='naive', n_iter=10, tol=0, undirected=False, method='exact'): if algo == 'diffusion': self.scorer = Diffusion(n_iter=n_iter) elif algo == 'closeness': self.scorer = Closeness(method=method, tol=tol) elif algo == 'harmonic': self.scorer = Harmonic() else: self.scorer = PageRank(damping_factor=damping_factor, solver=solver, n_iter=n_iter, tol=tol) self.undirected = undirected def score(self, data): node_dict = {node['name']: i for i, node in enumerate(data['node'])} edges = np.array([(node_dict[edge['node'][0]], node_dict[edge['node'][1]], edge['weight']) for edge in data['edge']]) adjacency = edgelist2adjacency(edges, undirected=self.undirected) if data['node'][0].get('weight'): seeds = np.array([node['weight'] for node in data['node']]) scores = self.scorer.fit_transform(adjacency, seeds) else: scores = self.scorer.fit_transform(adjacency) return {k: scores[v] for k, v in node_dict.items()}
def pagerank_scikit(G, sim_mat, user_idx, alpha, beta): nodelist = G.nodes() M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight='weight', dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M M = beta * M + (1 - beta) * sim_mat pagerank = PageRank(damping_factor=alpha) ppr_mat = [] print_every = int(len(user_idx) / 3) s = time.time() for i in user_idx: seeds = {i: 1} pr = pagerank.fit_transform(M, seeds) ppr_mat.append(pr) if (i + 1) % print_every == 0: print('{}% {}sec'.format(i / len(user_idx) * 100, time.time() - s)) return np.array(ppr_mat)
def compute_rank(self, file_name): x = csr_matrix((self.v, (self.b, self.a)), shape=(len(self.destin_idx), len(self.destin_idx)), dtype=float) print(x) adjacency = x.multiply(x.transpose()) pagerank = PageRank() scores = pagerank.fit_transform(adjacency) image = svg_graph(adjacency, names=self.destin_names, scores=scores, display_node_weight=True, node_order=np.argsort(scores)) with open(file_name, "w") as text_file: print(file_name) print(scores) text_file.write(image) print(self.v) print(self.destin_names) paris = Paris() dendrogram = paris.fit_transform(adjacency) image = svg_dendrogram(dendrogram, self.destin_names, n_clusters=5, rotate=True) with open("dento_" + file_name, "w") as text_file: text_file.write(image)
def __init__(self, damping_factor: float = 0.85, solver: str = 'naive', n_iter: int = 10, tol: float = 0., n_jobs: Optional[int] = None, verbose: bool = False): algorithm = PageRank(damping_factor, solver, n_iter, tol) super(PageRankClassifier, self).__init__(algorithm, n_jobs, verbose)
def __init__(self, algo=None, damping_factor=0.85, solver='naive', n_iter=10, tol=0, undirected=False, method='exact'): if algo == 'diffusion': self.scorer = Diffusion(n_iter=n_iter) elif algo == 'closeness': self.scorer = Closeness(method=method, tol=tol) elif algo == 'harmonic': self.scorer = Harmonic() else: self.scorer = PageRank(damping_factor=damping_factor, solver=solver, n_iter=n_iter, tol=tol) self.undirected = undirected
def get_triples_of_event(self, seed_vertex, topN=10): """ 先得到种子点,然后执行personal PageRank得到与种子点相关的triples :return: """ triples_of_event = [] topN = topN if topN < len(self.unique_triples) else len( self.unique_triples) pagerank = PageRank() scores = pagerank.fit_transform( self.edge_weight, {seed_vertex: 1}) # 对每个种子点运行Personal PageRank idx_sorted = np.argsort(-scores) for idx in idx_sorted[:topN]: triples_of_event.append(self.unique_triples[idx]) debug_logger.debug("seed vertex: {}".format(seed_vertex)) for idx in idx_sorted: debug_logger.debug("weight: {}, triple: {}".format( scores[idx], self.unique_triples[idx].to_string())) return triples_of_event
class RelavanceScore(TorchModel): ''' Calculate relavance score between countries and concepts Return a N*M matrix N: country num M: concept num ''' def __init__(self): super(RelavanceScore, self).__init__() self.pagerank = PageRank() def run(self, adj, seeds, paper_id, country_id, concept_id, paper_country, paper_concept): pr_scores = self.pagerank.fit_transform(adj, seeds) # pagerank scores w_paper = pr_scores[0:len(paper_id)] w_paper /= w_paper.sum() # normalize the paper weight # calculate the paper-county relavance score paper_country_edge = [] country = set() for p in paper_country: for c in paper_country[p]: country.add(c) paper_country_edge.append((paper_id[p], \ country_id[c] - (len(paper_id) + len(concept_id)), 1 / len(paper_country[p]))) # add the countries without papers and set their edges to 0 for c in country_id: if c not in country: paper_country_edge.append((paper_id[0], \ country_id[c] - (len(paper_id) + len(concept_id)), 0)) country_paper_mat = sknetwork.utils.edgelist2biadjacency(paper_country_edge).transpose() # calculate the paper-concept relavance score paper_concept_edge = [] concept = set() for p in paper_country: # add the paper without concepts and set their edges to 0 if p not in paper_concept: paper_concept_edge.append((paper_id[p], concept_id[72] - (len(paper_id)), 0)) continue for c in paper_concept[p]: paper_concept_edge.append((paper_id[p], concept_id[c] - (len(paper_id)), \ 1 / len(paper_concept[p]) * w_paper[paper_id[p]])) concept.add(c) # add the concepts not belonging to any papers and set their edges to 0 for c in concept_id: if c not in concept: paper_concept_edge.append((paper_id[0], concept_id[c] - (len(paper_id)), 0)) paper_concept_mat = sknetwork.utils.edgelist2biadjacency(paper_concept_edge) country_concept = country_paper_mat.dot(paper_concept_mat) return country_concept
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) -> 'MultiRank': """Compute personalized PageRank using each given labels as seed set. Parameters ---------- adjacency: Adjacency matrix of the graph. seeds: Dict or ndarray, If dict, ``(key, val)`` indicates that node ``key`` has label ``val``. If ndarray, ``seeds[i] = val`` indicates that node ``i`` has label ``val``. Negative values are treated has no label. Returns ------- self: :class:`MultiRank` """ if isinstance(self, BiMultiRank): pr = BiPageRank(self.damping_factor, self.solver) else: pr = PageRank(self.damping_factor, self.solver) seeds_labels = check_seeds(seeds, adjacency) classes, n_classes = check_labels(seeds_labels) n: int = adjacency.shape[0] personalizations = [] for label in classes: personalization = np.array(seeds_labels == label).astype(int) personalizations.append(personalization) if self.n_jobs != 1: local_function = partial(pr.fit_transform, adjacency) with Pool(self.n_jobs) as pool: membership = np.array( pool.map(local_function, personalizations)) membership = membership.T else: membership = np.zeros((n, n_classes)) for i in range(n_classes): membership[:, i] = pr.fit_transform( adjacency, personalization=personalizations[i])[:n] norm = np.sum(membership, axis=1) membership[norm > 0] /= norm[norm > 0, np.newaxis] self.membership_ = membership return self
def pagerank_scikit(G): M = nx.to_scipy_sparse_matrix(G, nodelist=G.nodes(), weight='weight', dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M pagerank = PageRank() ppr_mat = [] for i in range(M.shape[0]): seeds = {i: 1} pr = pagerank.fit_transform(M, seeds) #print(pr.shape) #print(pr) ppr_mat.append(pr) return np.array(ppr_mat)
def __init__(self): super(RelavanceScore, self).__init__() self.pagerank = PageRank()
from sknetwork.ranking import PageRank from sknetwork.data import load_edge_list, house adjacency = house() pagerank = PageRank(solver='push') scores = pagerank.fit_transform(adjacency) print(scores) """ the result should be like: [0.17301832 0.22442742 0.1823948 0.18926552 0.23089394] """
time_start = time.time() n = adjacency.shape[0] degrees = adjacency.dot(np.ones(n)).astype(np.int32) rev_adjacency = adjacency.transpose().tocsr() indptr = adjacency.indptr.astype(np.int32) indices = adjacency.indices.astype(np.int32) rev_indptr = rev_adjacency.indptr.astype(np.int32) rev_indices = rev_adjacency.indices.astype(np.int32) scores = push_pagerank(n, degrees, indptr, indices, rev_indptr, rev_indices, seeds.astype(np.float32), damping_factor, tol) time_end = time.time() print("Push Calculation time:", time_end - time_start, "seconds") print("Result:") print_highest_lowest_values(scores, pages) print("Chess rang:") print(get_rang(scores, 2597)) print(get_rang(scores, 26634)) print(get_rang(scores, 229857)) print("------") # Scikit Network time_start = time.time() pagerank = PageRank() scores = pagerank.fit_transform(adjacency, seeds) time_end = time.time() print("Sknetwork power iteration time:", time_end - time_start, "seconds") print("Result:") print_highest_lowest_values(scores, pages)
from IPython.display import SVG import numpy as np from sknetwork.data import karate_club, painters, movie_actor from sknetwork.data import load_edge_list from sknetwork.ranking import PageRank, BiPageRank from sknetwork.visualization import svg_graph, svg_digraph, svg_bigraph if __name__ == '__main__': graph = load_edge_list('test.csv', directed=True, fast_format=False) adjacency = graph.adjacency "position = graph.position" pagerank = PageRank(damping_factor=0.15) scores = pagerank.fit_transform(adjacency) print(scores) sum = sum(scores) print(sum) "image = svg_graph(adjacency, position, scores=np.log(scores))" "SVG(image)"