Esempio n. 1
0
 def _get_reddit_posts_as_documents(self, n, keyword="data"):
     #récupère n publication de reddit.com
     hot_posts = reddit.subreddit(keyword).hot(limit=n)
     posts = []
     for post in hot_posts:
         posts.append(Document.factory("Reddit", post))
     return posts
Esempio n. 2
0
 def _get_arxiv_publications_as_documents(self, n, keyword="data"):
     #récupère n publications de arxiv.org
     url = 'http://export.arxiv.org/api/query?search_query=all:' + keyword + '&start=0&max_results=' + str(
         n)
     data = xmltodict.parse(urllib.request.urlopen(url).read())
     pubs = []
     #si un seul document est requêté, le format est différent
     if n == 1:
         data["feed"]["entry"] = [data["feed"]["entry"]]
     for pub in data["feed"]["entry"]:
         pubs.append(Document.factory("Arxiv", pub))
     return pubs