class ScikitNetworkScorer: def __init__(self, algo=None, damping_factor=0.85, solver='naive', n_iter=10, tol=0, undirected=False, method='exact'): if algo == 'diffusion': self.scorer = Diffusion(n_iter=n_iter) elif algo == 'closeness': self.scorer = Closeness(method=method, tol=tol) elif algo == 'harmonic': self.scorer = Harmonic() else: self.scorer = PageRank(damping_factor=damping_factor, solver=solver, n_iter=n_iter, tol=tol) self.undirected = undirected def score(self, data): node_dict = {node['name']: i for i, node in enumerate(data['node'])} edges = np.array([(node_dict[edge['node'][0]], node_dict[edge['node'][1]], edge['weight']) for edge in data['edge']]) adjacency = edgelist2adjacency(edges, undirected=self.undirected) if data['node'][0].get('weight'): seeds = np.array([node['weight'] for node in data['node']]) scores = self.scorer.fit_transform(adjacency, seeds) else: scores = self.scorer.fit_transform(adjacency) return {k: scores[v] for k, v in node_dict.items()}
def pagerank_scikit(G, sim_mat, user_idx, alpha, beta): nodelist = G.nodes() M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight='weight', dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M M = beta * M + (1 - beta) * sim_mat pagerank = PageRank(damping_factor=alpha) ppr_mat = [] print_every = int(len(user_idx) / 3) s = time.time() for i in user_idx: seeds = {i: 1} pr = pagerank.fit_transform(M, seeds) ppr_mat.append(pr) if (i + 1) % print_every == 0: print('{}% {}sec'.format(i / len(user_idx) * 100, time.time() - s)) return np.array(ppr_mat)
def compute_rank(self, file_name): x = csr_matrix((self.v, (self.b, self.a)), shape=(len(self.destin_idx), len(self.destin_idx)), dtype=float) print(x) adjacency = x.multiply(x.transpose()) pagerank = PageRank() scores = pagerank.fit_transform(adjacency) image = svg_graph(adjacency, names=self.destin_names, scores=scores, display_node_weight=True, node_order=np.argsort(scores)) with open(file_name, "w") as text_file: print(file_name) print(scores) text_file.write(image) print(self.v) print(self.destin_names) paris = Paris() dendrogram = paris.fit_transform(adjacency) image = svg_dendrogram(dendrogram, self.destin_names, n_clusters=5, rotate=True) with open("dento_" + file_name, "w") as text_file: text_file.write(image)
class RelavanceScore(TorchModel): ''' Calculate relavance score between countries and concepts Return a N*M matrix N: country num M: concept num ''' def __init__(self): super(RelavanceScore, self).__init__() self.pagerank = PageRank() def run(self, adj, seeds, paper_id, country_id, concept_id, paper_country, paper_concept): pr_scores = self.pagerank.fit_transform(adj, seeds) # pagerank scores w_paper = pr_scores[0:len(paper_id)] w_paper /= w_paper.sum() # normalize the paper weight # calculate the paper-county relavance score paper_country_edge = [] country = set() for p in paper_country: for c in paper_country[p]: country.add(c) paper_country_edge.append((paper_id[p], \ country_id[c] - (len(paper_id) + len(concept_id)), 1 / len(paper_country[p]))) # add the countries without papers and set their edges to 0 for c in country_id: if c not in country: paper_country_edge.append((paper_id[0], \ country_id[c] - (len(paper_id) + len(concept_id)), 0)) country_paper_mat = sknetwork.utils.edgelist2biadjacency(paper_country_edge).transpose() # calculate the paper-concept relavance score paper_concept_edge = [] concept = set() for p in paper_country: # add the paper without concepts and set their edges to 0 if p not in paper_concept: paper_concept_edge.append((paper_id[p], concept_id[72] - (len(paper_id)), 0)) continue for c in paper_concept[p]: paper_concept_edge.append((paper_id[p], concept_id[c] - (len(paper_id)), \ 1 / len(paper_concept[p]) * w_paper[paper_id[p]])) concept.add(c) # add the concepts not belonging to any papers and set their edges to 0 for c in concept_id: if c not in concept: paper_concept_edge.append((paper_id[0], concept_id[c] - (len(paper_id)), 0)) paper_concept_mat = sknetwork.utils.edgelist2biadjacency(paper_concept_edge) country_concept = country_paper_mat.dot(paper_concept_mat) return country_concept
def get_triples_of_event(self, seed_vertex, topN=10): """ 先得到种子点,然后执行personal PageRank得到与种子点相关的triples :return: """ triples_of_event = [] topN = topN if topN < len(self.unique_triples) else len( self.unique_triples) pagerank = PageRank() scores = pagerank.fit_transform( self.edge_weight, {seed_vertex: 1}) # 对每个种子点运行Personal PageRank idx_sorted = np.argsort(-scores) for idx in idx_sorted[:topN]: triples_of_event.append(self.unique_triples[idx]) debug_logger.debug("seed vertex: {}".format(seed_vertex)) for idx in idx_sorted: debug_logger.debug("weight: {}, triple: {}".format( scores[idx], self.unique_triples[idx].to_string())) return triples_of_event
from sknetwork.ranking import PageRank from sknetwork.data import load_edge_list, house adjacency = house() pagerank = PageRank(solver='push') scores = pagerank.fit_transform(adjacency) print(scores) """ the result should be like: [0.17301832 0.22442742 0.1823948 0.18926552 0.23089394] """