def get_langlinks(self): """ Retrieve the list of hyperlinks to translation of the current page Returns ------- langlinks : list List of language codes (e.g "en", "fr", "es", "ru", etc) todo: put a link to a page with the list of languages """ api = API(self.lang) langlinks = [] params = { "format": "json", "action": "query", "titles": self.title, "prop": "langlinks", "lllimit": 500 } r = api.get(params) # print r page = r["query"]["pages"][ list(r["query"]["pages"].keys())[0] ] if ("langlinks" in page): langlinks += page["langlinks"] return langlinks
def fetch_info(self, title, opt_params={ "prop": "info", "inprop": "url" }, lang="en"): api = API(lang) params = { "format": "json", "action": "query", "titles": u""+title # "rvprop": "content", # "redirects": "" } params.update(opt_params) r = api.get(params) # print r.json() pages = r["query"]["pages"] self.page_id = list(pages.keys())[0] self.title = pages[ self.page_id ]["title"] self.lang = lang self.url = pages[ self.page_id ]["fullurl"] self.data.update(pages[ self.page_id ]) # print r.url # print r.text return r
def get_diff_full(self, rev_id=""): """ Retrieve the full json response from a request for diff. Parameters ---------- rev_id : string, optional If no revision id is supplied, the method retrieve the diff from the current version of the page and compare it to its predecessor. """ api = API(self.lang) q = { "format": "json", "action": "query", "titles": self.title, "redirects":"true", #"rvparse" : "true", "prop": "info|revisions", "inprop": "url", # "rvlimit": 1, # "rvprop": "content", "rvdiffto" : "prev" } if rev_id != "": q.update({ "rvlimit":1, "rvstartid": rev_id }) r = api.get(q) return r
def fetch_contribs(self): """ get all contributions from a user """ api = API() contribs = [] params = { "action":"query", "format": "json", "list":"usercontribs", "ucuser": self.name, "uclimit": "500", "continue": "" } while True: r = api.get(params) contribs += r["query"]["usercontribs"] if "continue" in r: # print r["continue"] params.update(r["continue"]) else: break return contribs
def test_request_too_long_success(): api = API() data = {'action': 'query', 'redirects': '', 'titles': u'\u0baa\u0bbf\u0baf\u0bcb\u0ba4\u0bcd\u0ba4\u0bb0\u0bcd \u0b87\u0bb2\u0bc0\u0b9a\u0bcd \u0b9a\u0bbe\u0baf\u0bcd\u0b95\u0bcd\u0b95\u0bcb\u0bb5\u0bcd\u0b9a\u0bc1\u0b95\u0bcd\u0b95\u0bbf|\u0b9a\u0bc1\u0bb1\u0bcd\u0bb1\u0bc1\u0bb2\u0bbe|\u0bb5\u0bcb\u0bb2\u0bcd\u0b95\u0bbe \u0b86\u0bb1\u0bc1|\u0baa\u0bc0\u0bb0\u0b99\u0bcd\u0b95\u0bbf \u0bb5\u0ba3\u0bcd\u0b9f\u0bbf|\u0bb0\u0baf\u0bbe\u0b9a\u0ba9\u0bcd \u0b92\u0baa\u0bcd\u0bb2\u0bbe\u0bb8\u0bcd\u0ba4\u0bc1|\u0b92\u0ba9\u0bcd\u0bb1\u0bc1\u0b95\u0bcd\u0b95\u0bc1 \u0bae\u0bc7\u0bb1\u0bcd\u0baa\u0b9f\u0bcd\u0b9f \u0b95\u0ba3\u0bcd\u0b9f\u0b99\u0bcd\u0b95\u0bb3\u0bbf\u0bb2\u0bcd \u0b85\u0bae\u0bc8\u0ba8\u0bcd\u0ba4\u0bc1\u0bb3\u0bcd\u0bb3 \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0ba8\u0bc6\u0baa\u0bcd\u0baa\u0bcb\u0bb2\u0bbf\u0baf\u0baa\u0bcd \u0baa\u0bcb\u0bb0\u0bcd\u0b95\u0bb3\u0bcd|\u0ba8\u0bc0\u0baa\u0bcd\u0baa\u0bcb \u0b86\u0bb1\u0bc1|\u0baa\u0bc6\u0bb0\u0bc1 \u0bb5\u0bc6\u0b9f\u0bbf\u0baa\u0bcd\u0baa\u0bc1\u0b95\u0bcd \u0b95\u0bcb\u0b9f\u0bcd\u0baa\u0bbe\u0b9f\u0bc1|\u0bae\u0bc8\u0b95\u0bcd\u0b95\u0bb2\u0bcd \u0b83\u0baa\u0bc6\u0bb2\u0bcd\u0baa\u0bcd\u0bb8\u0bcd|\u0baa\u0bbe\u0bb0\u0bcd\u0bae\u0bc1\u0bb2\u0bbe 1|\u0ba4\u0bbf\u0b9f\u0bcd\u0b9f\u0bae\u0bbf\u0b9f\u0bcd\u0b9f \u0baa\u0bca\u0bb0\u0bc1\u0bb3\u0bbe\u0ba4\u0bbe\u0bb0\u0bae\u0bcd|\u0b9c\u0baa\u0bcd\u0baa\u0bbe\u0ba9\u0bcd|\u0b9a\u0bbe\u0bb0\u0bcd\u0baa\u0bbe\u0ba3\u0bcd\u0bae\u0bc8 \u0bae\u0b95\u0bcd\u0b95\u0bb3\u0bbe\u0b9f\u0bcd\u0b9a\u0bbf|\u0baa\u0bc1\u0ba4\u0bbf\u0baf \u0bb5\u0bb3\u0bb0\u0bcd\u0b9a\u0bcd\u0b9a\u0bbf \u0bb5\u0b99\u0bcd\u0b95\u0bbf|\u0b86\u0bb0\u0bcd\u0bae\u0bc0\u0ba9\u0bbf\u0baf\u0bbe|\u0baa\u0bbf\u0bb1\u0bbf\u0bae\u0bcb\u0bb0\u0bcd\u0b9a\u0bc1\u0b95\u0bcd\u0b95\u0bbf \u0ba8\u0bbf\u0bb2\u0baa\u0bcd\u0baa\u0bb0\u0baa\u0bcd\u0baa\u0bc1|\u0b9f\u0bbf\u0bb2\u0bcd\u0bae\u0bbe \u0bb0\u0bc2\u0b9a\u0bc6\u0b83\u0baa\u0bcd|\u0b85\u0ba9\u0bc8\u0ba4\u0bcd\u0ba4\u0bc1 \u0b87\u0bb1\u0bc8\u0b95\u0bcd \u0b95\u0bca\u0bb3\u0bcd\u0b95\u0bc8|\u0baa\u0bc6\u0b9f\u0bcd\u0bb0\u0bcb\u0bb2\u0bbf\u0baf \u0b89\u0bb1\u0bcd\u0baa\u0ba4\u0bcd\u0ba4\u0bbf \u0b85\u0b9f\u0bbf\u0baa\u0bcd\u0baa\u0b9f\u0bc8\u0baf\u0bbf\u0bb2\u0bcd \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|2014 \u0baa\u0bbf\u0bb0\u0bbf\u0b95\u0bcd\u0bb8\u0bcd \u0bae\u0bbe\u0ba8\u0bbe\u0b9f\u0bc1|\u0baa\u0bc6\u0bb2\u0bcd\u0b95\u0bcb\u0bb0\u0ba4\u0bcd \u0b92\u0baa\u0bcd\u0bb2\u0bbe\u0bb8\u0bcd\u0ba4\u0bc1|\u0baa\u0bbf\u0bb2\u0bbf\u0baa\u0bcd\u0baa\u0bc0\u0ba9\u0bcd\u0b9a\u0bc1|\u0baa\u0bc1\u0ba4\u0bc1\u0baa\u0bcd\u0baa\u0bbf\u0b95\u0bcd\u0b95\u0ba4\u0bcd\u0ba4\u0b95\u0bcd\u0b95 \u0ba8\u0bc0\u0bb0\u0bcd\u0bb5\u0bb3\u0bae\u0bcd \u0b95\u0bca\u0ba3\u0bcd\u0b9f \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0b85\u0ba3\u0bc1 \u0b86\u0baf\u0bc1\u0ba4 \u0b9a\u0b95\u0bcd\u0ba4\u0bbf\u0baf\u0bc1\u0b9f\u0bc8\u0baf \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bcd|\u0bb2\u0bbf\u0baf\u0bcb \u0b9f\u0bbe\u0bb2\u0bcd\u0bb8\u0bcd\u0b9f\u0bbe\u0baf\u0bcd|\u0b85\u0bae\u0bc6\u0bb0\u0bbf\u0b95\u0bcd\u0b95\u0b95\u0bcd \u0b95\u0bbe\u0b99\u0bcd\u0b95\u0bbf\u0bb0\u0b9a\u0bc1 \u0ba8\u0bc2\u0bb2\u0b95\u0bae\u0bcd|\u0b86\u0bb0\u0bcd\u0b95\u0bcd\u0b95\u0bbe\u0b99\u0bcd\u0b95\u0bc6\u0bb2\u0bcd\u0b9a\u0bbf\u0b95\u0bcd \u0b93\u0baa\u0bb2\u0bbe\u0b9a\u0bc1\u0ba4\u0bcd\u0ba4\u0bc1|\u0bae\u0ba9\u0bbf\u0ba4 \u0bb5\u0bb3\u0bb0\u0bcd\u0b9a\u0bcd\u0b9a\u0bbf\u0b9a\u0bcd \u0b9a\u0bc1\u0b9f\u0bcd\u0b9f\u0bc6\u0ba3\u0bcd|\u0bb2\u0bbf\u0ba4\u0bcd\u0ba4\u0bc1\u0bb5\u0bc7\u0ba9\u0bbf\u0baf\u0bbe|\u0baf\u0bc7\u0bb0\u0bcd\u0b9a\u0bbf|\u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0bb5\u0bc6\u0bb3\u0bbf\u0b95\u0bcd\u0b95\u0b9f\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0bb5\u0bbe\u0b9a\u0bbf\u0b99\u0bcd\u0b9f\u0ba9\u0bcd, \u0b9f\u0bbf. \u0b9a\u0bbf.|\u0baa\u0bc6\u0bb0\u0bc1|\u0bae\u0bb1\u0bc1\u0bae\u0bb2\u0bb0\u0bcd\u0b9a\u0bcd\u0b9a\u0bbf (\u0b90\u0bb0\u0bcb\u0baa\u0bcd\u0baa\u0bbe)|\u0b95\u0bbf\u0baf\u0bc2\u0baa\u0bbe \u0b8f\u0bb5\u0bc1\u0b95\u0ba3\u0bc8 \u0ba8\u0bc6\u0bb0\u0bc1\u0b95\u0bcd\u0b95\u0b9f\u0bbf|\u0bae\u0bbe\u0bb8\u0bcd\u0b95\u0bcb \u0b85\u0bb0\u0b9a\u0bc1\u0baa\u0bcd \u0baa\u0bb2\u0bcd\u0b95\u0bb2\u0bc8\u0b95\u0bcd\u0b95\u0bb4\u0b95\u0bae\u0bcd|\u0ba4\u0bae\u0ba4\u0bc7\u0ba4\u0bb5\u0bcb \u0bb5\u0bbe\u0ba9\u0bc2\u0bb0\u0bcd\u0ba4\u0bbf \u0ba8\u0bbf\u0bb2\u0bc8\u0baf\u0bae\u0bcd|\u0bae\u0ba9\u0bbf\u0ba4 \u0bb5\u0bb3\u0bb0\u0bcd\u0b9a\u0bcd\u0b9a\u0bbf \u0b9a\u0bc1\u0b9f\u0bcd\u0b9f\u0bc6\u0ba3\u0bcd\u0ba3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bbf \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0b9a\u0bc8\u0baa\u0bcd\u0baa\u0bbf\u0bb0\u0b9a\u0bc1|\u0bb2\u0bc6\u0ba9\u0bbf\u0ba9\u0bcd\u0b95\u0bbf\u0bb0\u0bbe\u0b9f\u0bcd \u0bae\u0bc1\u0bb1\u0bcd\u0bb1\u0bc1\u0b95\u0bc8|\u0baa\u0bbf\u0bb0\u0bc7\u0b9a\u0bbf\u0bb2\u0bcd|\u0baa\u0ba9\u0bcd\u0ba9\u0bbe\u0b9f\u0bcd\u0b9f\u0bc1 \u0bae\u0ba9\u0bcd\u0ba9\u0bbf\u0baa\u0bcd\u0baa\u0bc1 \u0b85\u0bb5\u0bc8|\u0b86\u0bb3\u0bcd\u0bb5\u0bc0\u0ba4 \u0bae\u0bca\u0ba4\u0bcd\u0ba4 \u0ba4\u0bc7\u0b9a\u0bbf\u0baf \u0b89\u0bb1\u0bcd\u0baa\u0ba4\u0bcd\u0ba4\u0bbf \u0b85\u0b9f\u0bbf\u0baa\u0bcd\u0baa\u0b9f\u0bc8\u0baf\u0bbf\u0bb2\u0bcd \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0bb2\u0bbf\u0baf\u0bcb\u0ba9\u0bbf\u0b9f\u0bcd \u0baa\u0bbf\u0bb0\u0bc6\u0bb7\u0bcd\u0ba9\u0bc6\u0bb5\u0bcd|\u0bae\u0bca\u0ba4\u0bcd\u0ba4 \u0ba4\u0bc7\u0b9a\u0bbf\u0baf \u0b89\u0bb1\u0bcd\u0baa\u0ba4\u0bcd\u0ba4\u0bbf \u0b85\u0b9f\u0bbf\u0baa\u0bcd\u0baa\u0b9f\u0bc8\u0baf\u0bbf\u0bb2\u0bcd \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0b9a\u0bae\u0bcd\u0baa\u0bcb|\u0b90\u0b95\u0bcd\u0b95\u0bbf\u0baf \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bcd \u0bae\u0ba9\u0bbf\u0ba4 \u0b89\u0bb0\u0bbf\u0bae\u0bc8\u0b95\u0bb3\u0bcd \u0b86\u0ba3\u0bc8\u0baf\u0bae\u0bcd|\u0b9c\u0bbf8', 'format': 'json'} r = api.post(data) assert "pages" in r["query"]
def get_links(self, extra_params={}): """ Retrieve links contained by a wikipedia page according to the API Parameters ---------- extra_params : dict, optional By default, the method will only retrieve links from the namespace 0 (usual pages) and skipped everything like templates, etc. You can still get `the other namespaces <http://en.wikipedia.org/wiki/Wikipedia:Namespace>`_ by updating the query with an extra parameters. >>> p.get_links({ plnamspace: 12 }) Returns ------- links : list See Also -------- """ links = [] api = API(self.lang) params = { "format": "json", "action": "query", "titles": self.title, "prop": "links", "pllimit": 500, "plnamespace": 0, "continue":"" } params.update(extra_params) while True: r = api.get(params) if "links" in r["query"]["pages"][ self.page_id ]: l = r["query"]["pages"][ self.page_id ]["links"] links.extend(l) if "continue" in r: params.update(r["continue"]) else: break return links
def get_revisions(self, extra_params={}): """ Parameters ---------- extra_params : dictionary Returns ------- revisions : list todo: document revisions@get_revisions """ api = API() params = { "format": "json", "action": "query", "titles": self.title, "prop": "revisions", "rvprop": "user|userid|timestamp|size|ids|sha1|comment|content", "rvlimit": "max", "redirects": "", "continue": "" } params.update(extra_params) # print params revisions = [] while True: r = api.get(params) # print r pages = r["query"]["pages"] page = pages[ list(pages.keys())[0] ] revisions += page["revisions"] if "continue" in r: params.update(r["continue"]) else: break return revisions
def get_categories(self, extra_params={}): """ Retrieve a list of all categories used on the provided pages Parameters ---------- extra_params : dict, optional - http://www.mediawiki.org/wiki/API:Property/Categories Returns ------- links : list See Also -------- """ categories = [] api = API(self.lang) params = { "format": "json", "action": "query", "titles": self.title, "prop": "categories", "cllimit": 500, "continue":"" } while True: r = api.get(params) if "categories" in r["query"]["pages"][ self.page_id ]: c = r["query"]["pages"][ self.page_id ]["categories"] categories.extend(c) if "continue" in r: params.update(r["continue"]) else: break return categories
def get_revisions_list(self, extra_params={}): """ Retrieve all the revisions and their info Return ------ revisions : list """ api = API() revisions = [] params = { "format": "json", "action": "query", "titles": self.title, "prop": "revisions", "rvprop": "user|userid|timestamp|size|ids|sha1|comment", "rvlimit": "max", "redirects": "", "continue": "" } while True: r = api.get(params) pages = r["query"]["pages"] page = pages[ list(pages.keys())[0] ] revisions += page["revisions"] if "continue" in r: params.update(r["continue"]) else: break return revisions