Exemple #1
0
  def get_langlinks(self):
    """
    Retrieve the list of hyperlinks to translation of the current page

    Returns
    -------
    langlinks : list
      List of language codes (e.g "en", "fr", "es", "ru", etc)
      todo: put a link to a page with the list of languages
    """
    api = API(self.lang)

    langlinks = []

    params = {
      "format": "json",
      "action": "query",
      "titles": self.title,
      "prop": "langlinks",
      "lllimit": 500
    }

    r = api.get(params)

    # print r

    page = r["query"]["pages"][ list(r["query"]["pages"].keys())[0] ]

    if ("langlinks" in page):
      langlinks += page["langlinks"]

    return langlinks
Exemple #2
0
  def fetch_info(self, title, opt_params={ "prop": "info", "inprop": "url" }, lang="en"):
    api = API(lang)

    params = {
      "format": "json",
      "action": "query",
      "titles": u""+title
      # "rvprop": "content",
      # "redirects": ""
    }

    params.update(opt_params)

    r = api.get(params)
    # print r.json()

    pages = r["query"]["pages"]

    self.page_id = list(pages.keys())[0]
    self.title = pages[ self.page_id ]["title"]
    self.lang = lang
    self.url = pages[ self.page_id ]["fullurl"]

    self.data.update(pages[ self.page_id ])

    # print r.url
    # print r.text

    return r
Exemple #3
0
  def get_diff_full(self, rev_id=""):
    """
    Retrieve the full json response from a request for diff.

    Parameters
    ----------
    rev_id : string, optional
      If no revision id is supplied, the method retrieve the diff from the
      current version of the page and compare it to its predecessor.
    """
    api = API(self.lang)

    q = {
      "format": "json",
      "action": "query",
      "titles": self.title,
      "redirects":"true",
      #"rvparse" : "true",
      "prop": "info|revisions",
      "inprop": "url",
      # "rvlimit": 1,
      # "rvprop": "content",
      "rvdiffto" : "prev"
    }

    if rev_id != "":
      q.update({ "rvlimit":1, "rvstartid": rev_id })

    r = api.get(q)

    return r
Exemple #4
0
  def fetch_contribs(self):
    """ get all contributions from a user """
    api = API()

    contribs = []

    params = {
      "action":"query",
      "format": "json",
      "list":"usercontribs",
      "ucuser": self.name,
      "uclimit": "500",
      "continue": ""
    }

    while True:
      r = api.get(params)
      contribs += r["query"]["usercontribs"]

      if "continue" in r:
#        print r["continue"]
        params.update(r["continue"])
      else:
        break

    return contribs
def test_request_too_long_success():
	api = API()

	data = {'action': 'query', 'redirects': '', 'titles': u'\u0baa\u0bbf\u0baf\u0bcb\u0ba4\u0bcd\u0ba4\u0bb0\u0bcd \u0b87\u0bb2\u0bc0\u0b9a\u0bcd \u0b9a\u0bbe\u0baf\u0bcd\u0b95\u0bcd\u0b95\u0bcb\u0bb5\u0bcd\u0b9a\u0bc1\u0b95\u0bcd\u0b95\u0bbf|\u0b9a\u0bc1\u0bb1\u0bcd\u0bb1\u0bc1\u0bb2\u0bbe|\u0bb5\u0bcb\u0bb2\u0bcd\u0b95\u0bbe \u0b86\u0bb1\u0bc1|\u0baa\u0bc0\u0bb0\u0b99\u0bcd\u0b95\u0bbf \u0bb5\u0ba3\u0bcd\u0b9f\u0bbf|\u0bb0\u0baf\u0bbe\u0b9a\u0ba9\u0bcd \u0b92\u0baa\u0bcd\u0bb2\u0bbe\u0bb8\u0bcd\u0ba4\u0bc1|\u0b92\u0ba9\u0bcd\u0bb1\u0bc1\u0b95\u0bcd\u0b95\u0bc1 \u0bae\u0bc7\u0bb1\u0bcd\u0baa\u0b9f\u0bcd\u0b9f \u0b95\u0ba3\u0bcd\u0b9f\u0b99\u0bcd\u0b95\u0bb3\u0bbf\u0bb2\u0bcd \u0b85\u0bae\u0bc8\u0ba8\u0bcd\u0ba4\u0bc1\u0bb3\u0bcd\u0bb3 \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0ba8\u0bc6\u0baa\u0bcd\u0baa\u0bcb\u0bb2\u0bbf\u0baf\u0baa\u0bcd \u0baa\u0bcb\u0bb0\u0bcd\u0b95\u0bb3\u0bcd|\u0ba8\u0bc0\u0baa\u0bcd\u0baa\u0bcb \u0b86\u0bb1\u0bc1|\u0baa\u0bc6\u0bb0\u0bc1 \u0bb5\u0bc6\u0b9f\u0bbf\u0baa\u0bcd\u0baa\u0bc1\u0b95\u0bcd \u0b95\u0bcb\u0b9f\u0bcd\u0baa\u0bbe\u0b9f\u0bc1|\u0bae\u0bc8\u0b95\u0bcd\u0b95\u0bb2\u0bcd \u0b83\u0baa\u0bc6\u0bb2\u0bcd\u0baa\u0bcd\u0bb8\u0bcd|\u0baa\u0bbe\u0bb0\u0bcd\u0bae\u0bc1\u0bb2\u0bbe 1|\u0ba4\u0bbf\u0b9f\u0bcd\u0b9f\u0bae\u0bbf\u0b9f\u0bcd\u0b9f \u0baa\u0bca\u0bb0\u0bc1\u0bb3\u0bbe\u0ba4\u0bbe\u0bb0\u0bae\u0bcd|\u0b9c\u0baa\u0bcd\u0baa\u0bbe\u0ba9\u0bcd|\u0b9a\u0bbe\u0bb0\u0bcd\u0baa\u0bbe\u0ba3\u0bcd\u0bae\u0bc8 \u0bae\u0b95\u0bcd\u0b95\u0bb3\u0bbe\u0b9f\u0bcd\u0b9a\u0bbf|\u0baa\u0bc1\u0ba4\u0bbf\u0baf \u0bb5\u0bb3\u0bb0\u0bcd\u0b9a\u0bcd\u0b9a\u0bbf \u0bb5\u0b99\u0bcd\u0b95\u0bbf|\u0b86\u0bb0\u0bcd\u0bae\u0bc0\u0ba9\u0bbf\u0baf\u0bbe|\u0baa\u0bbf\u0bb1\u0bbf\u0bae\u0bcb\u0bb0\u0bcd\u0b9a\u0bc1\u0b95\u0bcd\u0b95\u0bbf \u0ba8\u0bbf\u0bb2\u0baa\u0bcd\u0baa\u0bb0\u0baa\u0bcd\u0baa\u0bc1|\u0b9f\u0bbf\u0bb2\u0bcd\u0bae\u0bbe \u0bb0\u0bc2\u0b9a\u0bc6\u0b83\u0baa\u0bcd|\u0b85\u0ba9\u0bc8\u0ba4\u0bcd\u0ba4\u0bc1 \u0b87\u0bb1\u0bc8\u0b95\u0bcd \u0b95\u0bca\u0bb3\u0bcd\u0b95\u0bc8|\u0baa\u0bc6\u0b9f\u0bcd\u0bb0\u0bcb\u0bb2\u0bbf\u0baf \u0b89\u0bb1\u0bcd\u0baa\u0ba4\u0bcd\u0ba4\u0bbf \u0b85\u0b9f\u0bbf\u0baa\u0bcd\u0baa\u0b9f\u0bc8\u0baf\u0bbf\u0bb2\u0bcd \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|2014 \u0baa\u0bbf\u0bb0\u0bbf\u0b95\u0bcd\u0bb8\u0bcd \u0bae\u0bbe\u0ba8\u0bbe\u0b9f\u0bc1|\u0baa\u0bc6\u0bb2\u0bcd\u0b95\u0bcb\u0bb0\u0ba4\u0bcd \u0b92\u0baa\u0bcd\u0bb2\u0bbe\u0bb8\u0bcd\u0ba4\u0bc1|\u0baa\u0bbf\u0bb2\u0bbf\u0baa\u0bcd\u0baa\u0bc0\u0ba9\u0bcd\u0b9a\u0bc1|\u0baa\u0bc1\u0ba4\u0bc1\u0baa\u0bcd\u0baa\u0bbf\u0b95\u0bcd\u0b95\u0ba4\u0bcd\u0ba4\u0b95\u0bcd\u0b95 \u0ba8\u0bc0\u0bb0\u0bcd\u0bb5\u0bb3\u0bae\u0bcd \u0b95\u0bca\u0ba3\u0bcd\u0b9f \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0b85\u0ba3\u0bc1 \u0b86\u0baf\u0bc1\u0ba4 \u0b9a\u0b95\u0bcd\u0ba4\u0bbf\u0baf\u0bc1\u0b9f\u0bc8\u0baf \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bcd|\u0bb2\u0bbf\u0baf\u0bcb \u0b9f\u0bbe\u0bb2\u0bcd\u0bb8\u0bcd\u0b9f\u0bbe\u0baf\u0bcd|\u0b85\u0bae\u0bc6\u0bb0\u0bbf\u0b95\u0bcd\u0b95\u0b95\u0bcd \u0b95\u0bbe\u0b99\u0bcd\u0b95\u0bbf\u0bb0\u0b9a\u0bc1 \u0ba8\u0bc2\u0bb2\u0b95\u0bae\u0bcd|\u0b86\u0bb0\u0bcd\u0b95\u0bcd\u0b95\u0bbe\u0b99\u0bcd\u0b95\u0bc6\u0bb2\u0bcd\u0b9a\u0bbf\u0b95\u0bcd \u0b93\u0baa\u0bb2\u0bbe\u0b9a\u0bc1\u0ba4\u0bcd\u0ba4\u0bc1|\u0bae\u0ba9\u0bbf\u0ba4 \u0bb5\u0bb3\u0bb0\u0bcd\u0b9a\u0bcd\u0b9a\u0bbf\u0b9a\u0bcd \u0b9a\u0bc1\u0b9f\u0bcd\u0b9f\u0bc6\u0ba3\u0bcd|\u0bb2\u0bbf\u0ba4\u0bcd\u0ba4\u0bc1\u0bb5\u0bc7\u0ba9\u0bbf\u0baf\u0bbe|\u0baf\u0bc7\u0bb0\u0bcd\u0b9a\u0bbf|\u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0bb5\u0bc6\u0bb3\u0bbf\u0b95\u0bcd\u0b95\u0b9f\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0bb5\u0bbe\u0b9a\u0bbf\u0b99\u0bcd\u0b9f\u0ba9\u0bcd, \u0b9f\u0bbf. \u0b9a\u0bbf.|\u0baa\u0bc6\u0bb0\u0bc1|\u0bae\u0bb1\u0bc1\u0bae\u0bb2\u0bb0\u0bcd\u0b9a\u0bcd\u0b9a\u0bbf (\u0b90\u0bb0\u0bcb\u0baa\u0bcd\u0baa\u0bbe)|\u0b95\u0bbf\u0baf\u0bc2\u0baa\u0bbe \u0b8f\u0bb5\u0bc1\u0b95\u0ba3\u0bc8 \u0ba8\u0bc6\u0bb0\u0bc1\u0b95\u0bcd\u0b95\u0b9f\u0bbf|\u0bae\u0bbe\u0bb8\u0bcd\u0b95\u0bcb \u0b85\u0bb0\u0b9a\u0bc1\u0baa\u0bcd \u0baa\u0bb2\u0bcd\u0b95\u0bb2\u0bc8\u0b95\u0bcd\u0b95\u0bb4\u0b95\u0bae\u0bcd|\u0ba4\u0bae\u0ba4\u0bc7\u0ba4\u0bb5\u0bcb \u0bb5\u0bbe\u0ba9\u0bc2\u0bb0\u0bcd\u0ba4\u0bbf \u0ba8\u0bbf\u0bb2\u0bc8\u0baf\u0bae\u0bcd|\u0bae\u0ba9\u0bbf\u0ba4 \u0bb5\u0bb3\u0bb0\u0bcd\u0b9a\u0bcd\u0b9a\u0bbf \u0b9a\u0bc1\u0b9f\u0bcd\u0b9f\u0bc6\u0ba3\u0bcd\u0ba3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bbf \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bbf\u0ba9\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0b9a\u0bc8\u0baa\u0bcd\u0baa\u0bbf\u0bb0\u0b9a\u0bc1|\u0bb2\u0bc6\u0ba9\u0bbf\u0ba9\u0bcd\u0b95\u0bbf\u0bb0\u0bbe\u0b9f\u0bcd \u0bae\u0bc1\u0bb1\u0bcd\u0bb1\u0bc1\u0b95\u0bc8|\u0baa\u0bbf\u0bb0\u0bc7\u0b9a\u0bbf\u0bb2\u0bcd|\u0baa\u0ba9\u0bcd\u0ba9\u0bbe\u0b9f\u0bcd\u0b9f\u0bc1 \u0bae\u0ba9\u0bcd\u0ba9\u0bbf\u0baa\u0bcd\u0baa\u0bc1 \u0b85\u0bb5\u0bc8|\u0b86\u0bb3\u0bcd\u0bb5\u0bc0\u0ba4 \u0bae\u0bca\u0ba4\u0bcd\u0ba4 \u0ba4\u0bc7\u0b9a\u0bbf\u0baf \u0b89\u0bb1\u0bcd\u0baa\u0ba4\u0bcd\u0ba4\u0bbf \u0b85\u0b9f\u0bbf\u0baa\u0bcd\u0baa\u0b9f\u0bc8\u0baf\u0bbf\u0bb2\u0bcd \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0bb2\u0bbf\u0baf\u0bcb\u0ba9\u0bbf\u0b9f\u0bcd \u0baa\u0bbf\u0bb0\u0bc6\u0bb7\u0bcd\u0ba9\u0bc6\u0bb5\u0bcd|\u0bae\u0bca\u0ba4\u0bcd\u0ba4 \u0ba4\u0bc7\u0b9a\u0bbf\u0baf \u0b89\u0bb1\u0bcd\u0baa\u0ba4\u0bcd\u0ba4\u0bbf \u0b85\u0b9f\u0bbf\u0baa\u0bcd\u0baa\u0b9f\u0bc8\u0baf\u0bbf\u0bb2\u0bcd \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bcd \u0baa\u0b9f\u0bcd\u0b9f\u0bbf\u0baf\u0bb2\u0bcd|\u0b9a\u0bae\u0bcd\u0baa\u0bcb|\u0b90\u0b95\u0bcd\u0b95\u0bbf\u0baf \u0ba8\u0bbe\u0b9f\u0bc1\u0b95\u0bb3\u0bcd \u0bae\u0ba9\u0bbf\u0ba4 \u0b89\u0bb0\u0bbf\u0bae\u0bc8\u0b95\u0bb3\u0bcd \u0b86\u0ba3\u0bc8\u0baf\u0bae\u0bcd|\u0b9c\u0bbf8', 'format': 'json'}

	r = api.post(data)

	assert "pages" in r["query"]
Exemple #6
0
  def get_links(self, extra_params={}):
    """
    Retrieve links contained by a wikipedia page according to the API

    Parameters
    ----------
    extra_params : dict, optional
      By default, the method will only retrieve links from the namespace 0
      (usual pages) and skipped everything like templates, etc.

      You can still get `the other namespaces
      <http://en.wikipedia.org/wiki/Wikipedia:Namespace>`_ by updating the query
      with an extra parameters.

      >>> p.get_links({ plnamspace: 12 })

    Returns
    -------
      links : list

    See Also
    --------
    """
    links = []

    api = API(self.lang)

    params = {
      "format": "json",
      "action": "query",
      "titles": self.title,
      "prop": "links",
      "pllimit": 500,
      "plnamespace": 0,
      "continue":""
    }

    params.update(extra_params)

    while True:
      r = api.get(params)

      if "links" in r["query"]["pages"][ self.page_id ]:
        l = r["query"]["pages"][ self.page_id ]["links"]
        links.extend(l)

      if "continue" in r:
        params.update(r["continue"])
      else:
        break

    return links
Exemple #7
0
  def get_revisions(self, extra_params={}):
    """
    Parameters
    ----------
    extra_params : dictionary

    Returns
    -------
    revisions : list
      todo: document revisions@get_revisions
    """
    api = API()

    params = {
      "format": "json",
      "action": "query",
      "titles": self.title,
      "prop": "revisions",
      "rvprop": "user|userid|timestamp|size|ids|sha1|comment|content",
      "rvlimit": "max",
      "redirects": "",
      "continue": ""
    }

    params.update(extra_params)

    # print params

    revisions = []

    while True:
      r = api.get(params)

      # print r
      pages = r["query"]["pages"]
      page = pages[ list(pages.keys())[0] ]

      revisions += page["revisions"]

      if "continue" in r:
        params.update(r["continue"])
      else:
        break

    return revisions
Exemple #8
0
  def get_categories(self, extra_params={}):
    """
    Retrieve a list of all categories used on the provided pages

    Parameters
    ----------
    extra_params : dict, optional
      - http://www.mediawiki.org/wiki/API:Property/Categories
    Returns
    -------
      links : list

    See Also
    --------
    """

    categories = []

    api = API(self.lang)

    params = {
      "format": "json",
      "action": "query",
      "titles": self.title,
      "prop": "categories",
      "cllimit": 500,
      "continue":""
    }

    while True:
      r = api.get(params)

      if "categories" in r["query"]["pages"][ self.page_id ]:
        c = r["query"]["pages"][ self.page_id ]["categories"]
        categories.extend(c)

      if "continue" in r:
        params.update(r["continue"])
      else:
        break

    return categories
Exemple #9
0
  def get_revisions_list(self, extra_params={}):
    """
    Retrieve all the revisions and their info

    Return
    ------
    revisions : list
    """
    api = API()

    revisions = []

    params = {
      "format": "json",
      "action": "query",
      "titles": self.title,
      "prop": "revisions",
      "rvprop": "user|userid|timestamp|size|ids|sha1|comment",
      "rvlimit": "max",
      "redirects": "",
      "continue": ""
    }

    while True:
      r = api.get(params)

      pages = r["query"]["pages"]
      page = pages[ list(pages.keys())[0] ]

      revisions += page["revisions"]

      if "continue" in r:
        params.update(r["continue"])
      else:
        break

    return revisions