def rebuild(): es.reset_whole_index() files = list((CACHE_DIR / 'publication').walkfiles('*.json')) i = 0 for file in files: i+=1 publication = json.loads(file.text()) (CACHE_DIR/'indexer.progress').write_text('%d / %d' % (i, len(files))) if not publication: continue es.index_publication( id=publication['id'], title=publication['title'], abstract=publication['abstract'], authors=publication['authors'], cited_ids=publication['citations'], reference_ids=publication['references'], ) es.refresh()
def rebuild(): es.reset_whole_index() files = list((CACHE_DIR / 'publication').walkfiles('*.json')) i = 0 for file in files: i += 1 publication = json.loads(file.text()) (CACHE_DIR / 'indexer.progress').write_text('%d / %d' % (i, len(files))) if not publication: continue es.index_publication( id=publication['id'], title=publication['title'], abstract=publication['abstract'], authors=publication['authors'], cited_ids=publication['citations'], reference_ids=publication['references'], ) es.refresh()
def _get_rank(cites): """ :param cites: np.array presenting citation of i -> j :return: array of page ranks indices are assumed 0..N """ n = cites.shape[0] p = np.array(cites, dtype=np.float64) alpha = 0.1 for i in range(n): p[i, :] = p[i, :] * (1 - alpha) + np.ones((1, n)) * alpha p[i, :] /= np.sum(p[i, :]) a = np.ones((n, n)) for i in range(50): p = np.dot(p, p) (CACHE_DIR / 'pagerank.progress').write_text('{}%'.format(i * 2 + 2)) a = np.dot(a, p) return a if __name__ == '__main__': pubs = es._get_all_publications() ranks = get_rank(pubs) es.update_ranks(pubs, ranks) es.refresh() print(es._get_all_publications()[20]['rank'])
def _get_rank(cites): """ :param cites: np.array presenting citation of i -> j :return: array of page ranks indices are assumed 0..N """ n = cites.shape[0] p = np.array(cites, dtype=np.float64) alpha = 0.1 for i in range(n): p[i, :] = p[i, :] * (1 - alpha) + np.ones((1, n)) * alpha p[i, :] /= np.sum(p[i, :]) a = np.ones((n, n)) for i in range(50): p = np.dot(p, p) (CACHE_DIR / "pagerank.progress").write_text("{}%".format(i * 2 + 2)) a = np.dot(a, p) return a if __name__ == "__main__": pubs = es._get_all_publications() ranks = get_rank(pubs) es.update_ranks(pubs, ranks) es.refresh() print(es._get_all_publications()[20]["rank"])