Python Sleepers.make Examples

Programming Language: Python

Namespace/Package Name: mwclient.sleep

Class/Type: Sleepers

Method/Function: make

Examples at hotexamples.com: 5

Python Sleepers.make - 5 examples found. These are the top rated real world Python examples of mwclient.sleep.Sleepers.make extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Sleepers(3)

make(3)

Example #1

Show file

File: test_sleep.py Project: AdamWill/mwclient

class TestSleepers(unittest.TestCase):

    def setUp(self):
        self.sleep = mock.patch('time.sleep').start()
        self.max_retries = 10
        self.sleepers = Sleepers(self.max_retries, 30)

    def tearDown(self):
        mock.patch.stopall()

    def test_make(self):
        sleeper = self.sleepers.make()
        assert type(sleeper) == Sleeper
        assert sleeper.retries == 0

    def test_sleep(self):
        sleeper = self.sleepers.make()
        sleeper.sleep()
        sleeper.sleep()
        self.sleep.assert_has_calls([mock.call(0), mock.call(30)])

    def test_min_time(self):
        sleeper = self.sleepers.make()
        sleeper.sleep(5)
        self.sleep.assert_has_calls([mock.call(5)])

    def test_retries_count(self):
        sleeper = self.sleepers.make()
        sleeper.sleep()
        sleeper.sleep()
        assert sleeper.retries == 2

    def test_max_retries(self):
        sleeper = self.sleepers.make()
        for x in range(self.max_retries):
            sleeper.sleep()
        with pytest.raises(MaximumRetriesExceeded):
            sleeper.sleep()

Example #2

Show file

class TestSleepers(unittest.TestCase):
    def setUp(self):
        self.sleep = mock.patch('time.sleep').start()
        self.max_retries = 10
        self.sleepers = Sleepers(self.max_retries, 30)

    def tearDown(self):
        mock.patch.stopall()

    def test_make(self):
        sleeper = self.sleepers.make()
        assert type(sleeper) == Sleeper
        assert sleeper.retries == 0

    def test_sleep(self):
        sleeper = self.sleepers.make()
        sleeper.sleep()
        sleeper.sleep()
        self.sleep.assert_has_calls([mock.call(0), mock.call(30)])

    def test_min_time(self):
        sleeper = self.sleepers.make()
        sleeper.sleep(5)
        self.sleep.assert_has_calls([mock.call(5)])

    def test_retries_count(self):
        sleeper = self.sleepers.make()
        sleeper.sleep()
        sleeper.sleep()
        assert sleeper.retries == 2

    def test_max_retries(self):
        sleeper = self.sleepers.make()
        for x in range(self.max_retries):
            sleeper.sleep()
        with pytest.raises(MaximumRetriesExceeded):
            sleeper.sleep()

Example #3

Show file

class Site(object):
    """A MediaWiki site identified by its hostname.

        >>> import mwclient
        >>> site = mwclient.Site('en.wikipedia.org')

    Do not include the leading "http://".

    Mwclient assumes that the script path (where index.php and api.php are located)
    is '/w/'. If the site uses a different script path, you must specify this
    (path must end in a '/').

    Examples:

        >>> site = mwclient.Site('vim.wikia.com', path='/')
        >>> site = mwclient.Site('sourceforge.net', path='/apps/mediawiki/mwclient/')

    """
    api_limit = 500

    def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30,
                 max_retries=25, wait_callback=lambda *x: None, clients_useragent=None,
                 max_lag=3, compress=True, force_login=True, do_init=True, httpauth=None,
                 reqs=None, consumer_token=None, consumer_secret=None, access_token=None,
                 access_secret=None, client_certificate=None, custom_headers=None):
        # Setup member variables
        self.host = host
        self.path = path
        self.ext = ext
        self.credentials = None
        self.compress = compress
        self.max_lag = text_type(max_lag)
        self.force_login = force_login
        self.requests = reqs or {}

        if consumer_token is not None:
            auth = OAuth1(consumer_token, consumer_secret, access_token, access_secret)
        elif isinstance(httpauth, (list, tuple)):
            auth = HTTPBasicAuth(*httpauth)
        elif httpauth is None or isinstance(httpauth, (AuthBase,)):
            auth = httpauth
        else:
            raise RuntimeError('Authentication is not a tuple or an instance of AuthBase')

        self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback)

        # Site properties
        self.blocked = False    # Whether current user is blocked
        self.hasmsg = False  # Whether current user has new messages
        self.groups = []    # Groups current user belongs to
        self.rights = []    # Rights current user has
        self.tokens = {}    # Edit tokens of the current user
        self.version = None

        self.namespaces = self.default_namespaces
        self.writeapi = False

        # Setup connection
        if pool is None:
            self.connection = requests.Session()
            self.connection.auth = auth
            if client_certificate:
                self.connection.cert = client_certificate

            prefix = '{} - '.format(clients_useragent) if clients_useragent else ''
            self.connection.headers['User-Agent'] = (
                '{prefix}MwClient/{ver} ({url})'.format(
                    prefix=prefix,
                    ver=__ver__,
                    url='https://github.com/mwclient/mwclient'
                )
            )
            if custom_headers:
                self.connection.headers.update(custom_headers)
        else:
            self.connection = pool

        # Page generators
        self.pages = listing.PageList(self)
        self.categories = listing.PageList(self, namespace=14)
        self.images = listing.PageList(self, namespace=6)

        # Compat page generators
        self.Pages = self.pages
        self.Categories = self.categories
        self.Images = self.images

        # Initialization status
        self.initialized = False

        if do_init:
            try:
                self.site_init()
            except errors.APIError as e:
                if e.args[0] == 'mwoauth-invalid-authorization':
                    raise errors.OAuthAuthorizationError(e.code, e.info)

                # Private wiki, do init after login
                if e.args[0] not in {u'unknown_action', u'readapidenied'}:
                    raise

    def site_init(self):

        if self.initialized:
            info = self.get('query', meta='userinfo', uiprop='groups|rights')
            userinfo = info['query']['userinfo']
            self.username = userinfo['name']
            self.groups = userinfo.get('groups', [])
            self.rights = userinfo.get('rights', [])
            self.tokens = {}
            return

        meta = self.get('query', meta='siteinfo|userinfo',
                        siprop='general|namespaces', uiprop='groups|rights',
                        retry_on_error=False)

        # Extract site info
        self.site = meta['query']['general']
        self.namespaces = {
            namespace['id']: namespace.get('*', '')
            for namespace in six.itervalues(meta['query']['namespaces'])
        }
        self.writeapi = 'writeapi' in self.site

        self.version = self.version_tuple_from_generator(self.site['generator'])

        # Require MediaWiki version >= 1.16
        self.require(1, 16)

        # User info
        userinfo = meta['query']['userinfo']
        self.username = userinfo['name']
        self.groups = userinfo.get('groups', [])
        self.rights = userinfo.get('rights', [])
        self.initialized = True

    @staticmethod
    def version_tuple_from_generator(string, prefix='MediaWiki '):
        """Return a version tuple from a MediaWiki Generator string.

        Example:
            "MediaWiki 1.5.1" → (1, 5, 1)

        Args:
            prefix (str): The expected prefix of the string
        """
        if not string.startswith(prefix):
            raise errors.MediaWikiVersionError('Unknown generator {}'.format(string))

        version = string[len(prefix):].split('.')

        def split_num(s):
            """Split the string on the first non-digit character.

            Returns:
                A tuple of the digit part as int and, if available,
                the rest of the string.
            """
            i = 0
            while i < len(s):
                if s[i] < '0' or s[i] > '9':
                    break
                i += 1
            if s[i:]:
                return (int(s[:i]), s[i:], )
            else:
                return (int(s[:i]), )

        version_tuple = sum((split_num(s) for s in version), ())

        if len(version_tuple) < 2:
            raise errors.MediaWikiVersionError('Unknown MediaWiki {}'
                                               .format('.'.join(version)))

        return version_tuple

    default_namespaces = {
        0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project',
        5: u'Project talk', 6: u'Image', 7: u'Image talk', 8: u'MediaWiki',
        9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk', 12: u'Help',
        13: u'Help talk', 14: u'Category', 15: u'Category talk',
        -1: u'Special', -2: u'Media'
    }

    def __repr__(self):
        return "<Site object '%s%s'>" % (self.host, self.path)

    def get(self, action, *args, **kwargs):
        """Perform a generic API call using GET.

        This is just a shorthand for calling api() with http_method='GET'.
        All arguments will be passed on.

        Returns:
            The raw response from the API call, as a dictionary.
        """
        return self.api(action, 'GET', *args, **kwargs)

    def post(self, action, *args, **kwargs):
        """Perform a generic API call using POST.

        This is just a shorthand for calling api() with http_method='POST'.
        All arguments will be passed on.

        Returns:
            The raw response from the API call, as a dictionary.
        """
        return self.api(action, 'POST', *args, **kwargs)

    def api(self, action, http_method='POST', *args, **kwargs):
        """Perform a generic API call and handle errors.

        All arguments will be passed on.

        Example:
            To get coordinates from the GeoData MediaWiki extension at English Wikipedia:

            >>> site = Site('en.wikipedia.org')
            >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen')
            >>> for page in result['query']['pages'].values():
            ...     if 'coordinates' in page:
            ...         print '{} {} {}'.format(page['title'],
            ...             page['coordinates'][0]['lat'],
            ...             page['coordinates'][0]['lon'])
            Oslo 59.95 10.75
            Copenhagen 55.6761 12.5683

        Returns:
            The raw response from the API call, as a dictionary.
        """
        kwargs.update(args)

        if 'continue' not in kwargs:
            kwargs['continue'] = ''
        if action == 'query':
            if 'meta' in kwargs:
                kwargs['meta'] += '|userinfo'
            else:
                kwargs['meta'] = 'userinfo'
            if 'uiprop' in kwargs:
                kwargs['uiprop'] += '|blockinfo|hasmsg'
            else:
                kwargs['uiprop'] = 'blockinfo|hasmsg'

        sleeper = self.sleepers.make()

        while True:
            info = self.raw_api(action, http_method, **kwargs)
            if not info:
                info = {}
            if self.handle_api_result(info, sleeper=sleeper):
                return info

    def handle_api_result(self, info, kwargs=None, sleeper=None):
        if sleeper is None:
            sleeper = self.sleepers.make()

        try:
            userinfo = info['query']['userinfo']
        except KeyError:
            userinfo = ()
        if 'blockedby' in userinfo:
            self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u''))
        else:
            self.blocked = False
        self.hasmsg = 'messages' in userinfo
        self.logged_in = 'anon' not in userinfo
        if 'error' in info:
            if info['error']['code'] in {u'internal_api_error_DBConnectionError',
                                         u'internal_api_error_DBQueryError'}:
                sleeper.sleep()
                return False
            if '*' in info['error']:
                raise errors.APIError(info['error']['code'],
                                      info['error']['info'], info['error']['*'])
            raise errors.APIError(info['error']['code'],
                                  info['error']['info'], kwargs)
        return True

    @staticmethod
    def _query_string(*args, **kwargs):
        kwargs.update(args)
        qs1 = [(k, v) for k, v in six.iteritems(kwargs) if k not in {'wpEditToken', 'token'}]
        qs2 = [(k, v) for k, v in six.iteritems(kwargs) if k in {'wpEditToken', 'token'}]
        return OrderedDict(qs1 + qs2)

    def raw_call(self, script, data, files=None, retry_on_error=True, http_method='POST'):
        """
        Perform a generic request and return the raw text.

        In the event of a network problem, or a HTTP response with status code 5XX,
        we'll wait and retry the configured number of times before giving up
        if `retry_on_error` is True.

        `requests.exceptions.HTTPError` is still raised directly for
        HTTP responses with status codes in the 4XX range, and invalid
        HTTP responses.

        Args:
            script (str): Script name, usually 'api'.
            data (dict): Post data
            files (dict): Files to upload
            retry_on_error (bool): Retry on connection error

        Returns:
            The raw text response.
        """
        headers = {}
        if self.compress and gzip:
            headers['Accept-Encoding'] = 'gzip'
        sleeper = self.sleepers.make((script, data))

        scheme = 'https'
        host = self.host
        if isinstance(host, (list, tuple)):
            scheme, host = host

        url = '{scheme}://{host}{path}{script}{ext}'.format(scheme=scheme, host=host,
                                                            path=self.path, script=script,
                                                            ext=self.ext)

        while True:
            try:
                if http_method == 'GET':
                    stream = self.connection.get(url, params=data, files=files,
                                                 headers=headers, **self.requests)
                else:
                    stream = self.connection.post(url, data=data, files=files,
                                                  headers=headers, **self.requests)
                if stream.headers.get('x-database-lag'):
                    wait_time = int(stream.headers.get('retry-after'))
                    log.warning('Database lag exceeds max lag. '
                                'Waiting for {} seconds'.format(wait_time))
                    sleeper.sleep(wait_time)
                elif stream.status_code == 200:
                    return stream.text
                elif stream.status_code < 500 or stream.status_code > 599:
                    stream.raise_for_status()
                else:
                    if not retry_on_error:
                        stream.raise_for_status()
                    log.warning('Received {status} response: {text}. '
                                'Retrying in a moment.'
                                .format(status=stream.status_code,
                                        text=stream.text))
                    sleeper.sleep()

            except requests.exceptions.ConnectionError:
                # In the event of a network problem
                # (e.g. DNS failure, refused connection, etc),
                # Requests will raise a ConnectionError exception.
                if not retry_on_error:
                    raise
                log.warning('Connection error. Retrying in a moment.')
                sleeper.sleep()

    def raw_api(self, action, http_method='POST', *args, **kwargs):
        """Send a call to the API."""
        try:
            retry_on_error = kwargs.pop('retry_on_error')
        except KeyError:
            retry_on_error = True
        kwargs['action'] = action
        kwargs['format'] = 'json'
        data = self._query_string(*args, **kwargs)
        res = self.raw_call('api', data, retry_on_error=retry_on_error,
                            http_method=http_method)

        try:
            return json.loads(res)
        except ValueError:
            if res.startswith('MediaWiki API is not enabled for this site.'):
                raise errors.APIDisabledError
            raise errors.InvalidResponse(res)

    def raw_index(self, action, http_method='POST', *args, **kwargs):
        """Sends a call to index.php rather than the API."""
        kwargs['action'] = action
        kwargs['maxlag'] = self.max_lag
        data = self._query_string(*args, **kwargs)
        return self.raw_call('index', data, http_method=http_method)

    def require(self, major, minor, revision=None, raise_error=True):
        if self.version is None:
            if raise_error is None:
                return
            raise RuntimeError('Site %s has not yet been initialized' % repr(self))

        if revision is None:
            if self.version[:2] >= (major, minor):
                return True
            elif raise_error:
                raise errors.MediaWikiVersionError(
                    'Requires version {required[0]}.{required[1]}, '
                    'current version is {current[0]}.{current[1]}'
                    .format(required=(major, minor),
                            current=(self.version[:2]))
                )
            else:
                return False
        else:
            raise NotImplementedError

    # Actions
    def email(self, user, text, subject, cc=False):
        """
        Send email to a specified user on the wiki.

            >>> try:
            ...     site.email('SomeUser', 'Some message', 'Some subject')
            ... except mwclient.errors.NoSpecifiedEmailError as e:
            ...     print 'The user does not accept email, or has not specified an email address.'

        Args:
            user (str): User name of the recipient
            text (str): Body of the email
            subject (str): Subject of the email
            cc (bool): True to send a copy of the email to yourself (default is False)

        Returns:
            Dictionary of the JSON response

        Raises:
            NoSpecifiedEmailError (mwclient.errors.NoSpecifiedEmailError): if recipient does not accept email
            EmailError (mwclient.errors.EmailError): on other errors
        """

        token = self.get_token('email')

        try:
            info = self.post('emailuser', target=user, subject=subject,
                             text=text, ccme=cc, token=token)
        except errors.APIError as e:
            if e.args[0] == u'noemail':
                raise errors.NoSpecifiedEmail(user, e.args[1])
            raise errors.EmailError(*e)

        return info

    def login(self, username=None, password=None, cookies=None, domain=None):
        """Login to the wiki."""

        if username and password:
            self.credentials = (username, password, domain)
        if cookies:
            self.connection.cookies.update(cookies)

        if self.credentials:
            sleeper = self.sleepers.make()
            kwargs = {
                'lgname': self.credentials[0],
                'lgpassword': self.credentials[1]
            }
            if self.credentials[2]:
                kwargs['lgdomain'] = self.credentials[2]
            while True:
                login = self.post('login', **kwargs)
                if login['login']['result'] == 'Success':
                    break
                elif login['login']['result'] == 'NeedToken':
                    kwargs['lgtoken'] = login['login']['token']
                elif login['login']['result'] == 'Throttled':
                    sleeper.sleep(int(login['login'].get('wait', 5)))
                else:
                    raise errors.LoginError(self, login['login'])

        self.site_init()

    def get_token(self, type, force=False, title=None):

        if self.version[:2] >= (1, 24):
            # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces
            # the majority of older tokens, like edittoken and movetoken.
            if type not in {'watch', 'patrol', 'rollback', 'userrights'}:
                type = 'csrf'

        if type not in self.tokens:
            self.tokens[type] = '0'

        if self.tokens.get(type, '0') == '0' or force:

            if self.version[:2] >= (1, 24):
                info = self.post('query', meta='tokens', type=type)
                self.tokens[type] = info['query']['tokens']['%stoken' % type]

            else:
                if title is None:
                    # Some dummy title was needed to get a token prior to 1.24
                    title = 'Test'
                info = self.post('query', titles=title,
                                 prop='info', intoken=type)
                for i in six.itervalues(info['query']['pages']):
                    if i['title'] == title:
                        self.tokens[type] = i['%stoken' % type]

        return self.tokens[type]

    def upload(self, file=None, filename=None, description='', ignore=False,
               file_size=None, url=None, filekey=None, comment=None):
        """Upload a file to the site.

        Note that one of `file`, `filekey` and `url` must be specified, but not
        more than one. For normal uploads, you specify `file`.

        Args:
            file (str): File object or stream to upload.
            filename (str): Destination filename, don't include namespace
                            prefix like 'File:'
            description (str): Wikitext for the file description page.
            ignore (bool): True to upload despite any warnings.
            file_size (int): Deprecated in mwclient 0.7
            url (str): URL to fetch the file from.
            filekey (str): Key that identifies a previous upload that was
                           stashed temporarily.
            comment (str): Upload comment. Also used as the initial page text
                           for new files if `description` is not specified.

        Example:

            >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg',
                              description='Some description')

        Returns:
            JSON result from the API.

        Raises:
            errors.InsufficientPermission
            requests.exceptions.HTTPError
        """

        if file_size is not None:
            # Note that DeprecationWarning is hidden by default since Python 2.7
            warnings.warn(
                'file_size is deprecated since mwclient 0.7',
                DeprecationWarning
            )

        if filename is None:
            raise TypeError('filename must be specified')

        if len([x for x in [file, filekey, url] if x is not None]) != 1:
            raise TypeError("exactly one of 'file', 'filekey' and 'url' must be specified")

        image = self.Images[filename]
        if not image.can('upload'):
            raise errors.InsufficientPermission(filename)

        predata = {}

        if comment is None:
            predata['comment'] = description
        else:
            predata['comment'] = comment
            predata['text'] = description

        if ignore:
            predata['ignorewarnings'] = 'true'
        predata['token'] = image.get_token('edit')
        predata['action'] = 'upload'
        predata['format'] = 'json'
        predata['filename'] = filename
        if url:
            predata['url'] = url

        # sessionkey was renamed to filekey in MediaWiki 1.18
        # https://phabricator.wikimedia.org/rMW5f13517e36b45342f228f3de4298bb0fe186995d
        if self.version[:2] < (1, 18):
            predata['sessionkey'] = filekey
        else:
            predata['filekey'] = filekey

        postdata = predata
        files = None
        if file is not None:

            # Workaround for https://github.com/mwclient/mwclient/issues/65
            # ----------------------------------------------------------------
            # Since the filename in Content-Disposition is not interpreted,
            # we can send some ascii-only dummy name rather than the real
            # filename, which might contain non-ascii.
            file = ('fake-filename', file)
            # End of workaround
            # ----------------------------------------------------------------

            files = {'file': file}

        sleeper = self.sleepers.make()
        while True:
            data = self.raw_call('api', postdata, files)
            info = json.loads(data)
            if not info:
                info = {}
            if self.handle_api_result(info, kwargs=predata, sleeper=sleeper):
                return info.get('upload', {})

    def parse(self, text=None, title=None, page=None, prop=None,
              redirects=False, mobileformat=False):
        kwargs = {}
        if text is not None:
            kwargs['text'] = text
        if title is not None:
            kwargs['title'] = title
        if page is not None:
            kwargs['page'] = page
        if prop is not None:
            kwargs['prop'] = prop
        if redirects:
            kwargs['redirects'] = '1'
        if mobileformat:
            kwargs['mobileformat'] = '1'
        result = self.get('parse', **kwargs)
        return result['parse']

    # def block(self): TODO?
    # def unblock: TODO?
    # def patrol: TODO?
    # def import: TODO?

    # Lists
    def allpages(self, start=None, prefix=None, namespace='0', filterredir='all',
                 minsize=None, maxsize=None, prtype=None, prlevel=None,
                 limit=None, dir='ascending', filterlanglinks='all', generator=True,
                 end=None):
        """Retrieve all pages on the wiki as a generator."""

        pfx = listing.List.get_prefix('ap', generator)
        kwargs = dict(listing.List.generate_kwargs(
            pfx, ('from', start), ('to', end), prefix=prefix,
            minsize=minsize, maxsize=maxsize, prtype=prtype, prlevel=prlevel,
            namespace=namespace, filterredir=filterredir, dir=dir,
            filterlanglinks=filterlanglinks,
        ))
        return listing.List.get_list(generator)(self, 'allpages', 'ap',
                                                limit=limit, return_values='title',
                                                **kwargs)

    def allimages(self, start=None, prefix=None, minsize=None, maxsize=None, limit=None,
                  dir='ascending', sha1=None, sha1base36=None, generator=True, end=None):
        """Retrieve all images on the wiki as a generator."""

        pfx = listing.List.get_prefix('ai', generator)
        kwargs = dict(listing.List.generate_kwargs(
            pfx, ('from', start), ('to', end), prefix=prefix,
            minsize=minsize, maxsize=maxsize,
            dir=dir, sha1=sha1, sha1base36=sha1base36,
        ))
        return listing.List.get_list(generator)(self, 'allimages', 'ai', limit=limit,
                                                return_values='timestamp|url',
                                                **kwargs)

    def alllinks(self, start=None, prefix=None, unique=False, prop='title',
                 namespace='0', limit=None, generator=True, end=None):
        """Retrieve a list of all links on the wiki as a generator."""

        pfx = listing.List.get_prefix('al', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), ('to', end),
                                                   prefix=prefix,
                                                   prop=prop, namespace=namespace))
        if unique:
            kwargs[pfx + 'unique'] = '1'
        return listing.List.get_list(generator)(self, 'alllinks', 'al', limit=limit,
                                                return_values='title', **kwargs)

    def allcategories(self, start=None, prefix=None, dir='ascending', limit=None,
                      generator=True, end=None):
        """Retrieve all categories on the wiki as a generator."""

        pfx = listing.List.get_prefix('ac', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), ('to', end),
                                                   prefix=prefix, dir=dir))
        return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit=limit,
                                                **kwargs)

    def allusers(self, start=None, prefix=None, group=None, prop=None, limit=None,
                 witheditsonly=False, activeusers=False, rights=None, end=None):
        """Retrieve all users on the wiki as a generator."""

        kwargs = dict(listing.List.generate_kwargs('au', ('from', start), ('to', end),
                                                   prefix=prefix,
                                                   group=group, prop=prop,
                                                   rights=rights,
                                                   witheditsonly=witheditsonly,
                                                   activeusers=activeusers))
        return listing.List(self, 'allusers', 'au', limit=limit, **kwargs)

    def blocks(self, start=None, end=None, dir='older', ids=None, users=None, limit=None,
               prop='id|user|by|timestamp|expiry|reason|flags'):
        """Retrieve blocks as a generator.

        Each block is a dictionary containing:

        - user: the username or IP address of the user
        - id: the ID of the block
        - timestamp: when the block was added
        - expiry: when the block runs out (infinity for indefinite blocks)
        - reason: the reason they are blocked
        - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page
        - by: the administrator who blocked the user
        - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled.

        """

        # TODO: Fix. Fix what?
        kwargs = dict(listing.List.generate_kwargs('bk', start=start, end=end, dir=dir,
                                                   ids=ids, users=users, prop=prop))
        return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs)

    def deletedrevisions(self, start=None, end=None, dir='older', namespace=None,
                         limit=None, prop='user|comment'):
        # TODO: Fix

        kwargs = dict(listing.List.generate_kwargs('dr', start=start, end=end, dir=dir,
                                                   namespace=namespace, prop=prop))
        return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs)

    def exturlusage(self, query, prop=None, protocol='http', namespace=None, limit=None):
        r"""Retrieve the list of pages that link to a particular domain or URL, as a generator.

        This API call mirrors the Special:LinkSearch function on-wiki.

        Query can be a domain like 'bbc.co.uk'.
        Wildcards can be used, e.g. '\*.bbc.co.uk'.
        Alternatively, a query can contain a full domain name and some or all of a URL:
        e.g. '\*.wikipedia.org/wiki/\*'

        See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details.

        The generator returns dictionaries containing three keys:
        - url: the URL linked to.
        - ns: namespace of the wiki page
        - pageid: the ID of the wiki page
        - title: the page title.

        """

        kwargs = dict(listing.List.generate_kwargs('eu', query=query, prop=prop,
                                                   protocol=protocol, namespace=namespace))
        return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs)

    def logevents(self, type=None, prop=None, start=None, end=None,
                  dir='older', user=None, title=None, limit=None, action=None):
        """Retrieve logevents as a generator."""
        kwargs = dict(listing.List.generate_kwargs('le', prop=prop, type=type, start=start,
                                                   end=end, dir=dir, user=user,
                                                   title=title, action=action))
        return listing.List(self, 'logevents', 'le', limit=limit, **kwargs)

    def checkuserlog(self, user=None, target=None, limit=10, dir='older',
                     start=None, end=None):
        """Retrieve checkuserlog items as a generator."""

        kwargs = dict(listing.List.generate_kwargs('cul', target=target, start=start,
                                                   end=end, dir=dir, user=user))
        return listing.NestedList('entries', self, 'checkuserlog', 'cul',
                                  limit=limit, **kwargs)

    # def protectedtitles requires 1.15
    def random(self, namespace, limit=20):
        """Retrieve a generator of random pages from a particular namespace.

        limit specifies the number of random articles retrieved.
        namespace is a namespace identifier integer.

        Generator contains dictionary with namespace, page ID and title.

        """

        kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace))
        return listing.List(self, 'random', 'rn', limit=limit, **kwargs)

    def recentchanges(self, start=None, end=None, dir='older', namespace=None,
                      prop=None, show=None, limit=None, type=None, toponly=None):
        """List recent changes to the wiki, à la Special:Recentchanges.
        """
        kwargs = dict(listing.List.generate_kwargs('rc', start=start, end=end, dir=dir,
                                                   namespace=namespace, prop=prop,
                                                   show=show, type=type,
                                                   toponly='1' if toponly else None))
        return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs)

    def revisions(self, revids, prop='ids|timestamp|flags|comment|user',
                  expandtemplates=False, diffto='prev'):
        """Get data about a list of revisions.

        See also the `Page.revisions()` method.

        API doc: https://www.mediawiki.org/wiki/API:Revisions

        Example: Get revision text for two revisions:

            >>> for revision in site.revisions([689697696, 689816909], prop='content'):
            ...     print revision['*']

        Args:
            revids (list): A list of (max 50) revisions.
            prop (str): Which properties to get for each revision.
            expandtemplates (bool): Expand templates in `rvprop=content` output.
            diffto (str): Revision ID to diff each revision to. Use "prev",
                          "next" and "cur" for the previous, next and current
                          revision respectively.

        Returns:
            A list of revisions
        """
        kwargs = {
            'prop': 'revisions',
            'rvprop': prop,
            'revids': '|'.join(map(text_type, revids))
        }
        if expandtemplates:
            kwargs['rvexpandtemplates'] = '1'
        if diffto:
            kwargs['rvdiffto'] = diffto

        revisions = []
        pages = self.get('query', **kwargs).get('query', {}).get('pages', {}).values()
        for page in pages:
            for revision in page.get('revisions', ()):
                revision['pageid'] = page.get('pageid')
                revision['pagetitle'] = page.get('title')
                revision['timestamp'] = parse_timestamp(revision['timestamp'])
                revisions.append(revision)
        return revisions

    def search(self, search, namespace='0', what=None, redirects=False, limit=None):
        """Perform a full text search.

        API doc: https://www.mediawiki.org/wiki/API:Search

        Example:
            >>> for result in site.search('prefix:Template:Citation/'):
            ...     print(result.get('title'))

        Args:
            search (str): The query string
            namespace (int): The namespace to search (default: 0)
            what (str): Search scope: 'text' for fulltext, or 'title' for titles only.
                        Depending on the search backend,
                        both options may not be available.
                        For instance
                        `CirrusSearch <https://www.mediawiki.org/wiki/Help:CirrusSearch>`_
                        doesn't support 'title', but instead provides an "intitle:"
                        query string filter.
            redirects (bool): Include redirect pages in the search
                              (option removed in MediaWiki 1.23).

        Returns:
            mwclient.listings.List: Search results iterator
        """
        kwargs = dict(listing.List.generate_kwargs('sr', search=search,
                                                   namespace=namespace, what=what))
        if redirects:
            kwargs['srredirects'] = '1'
        return listing.List(self, 'search', 'sr', limit=limit, **kwargs)

    def usercontributions(self, user, start=None, end=None, dir='older', namespace=None,
                          prop=None, show=None, limit=None):
        """
        List the contributions made by a given user to the wiki, à la Special:Contributions.

        API doc: https://www.mediawiki.org/wiki/API:Usercontribs
        """
        kwargs = dict(listing.List.generate_kwargs('uc', user=user, start=start, end=end,
                                                   dir=dir, namespace=namespace,
                                                   prop=prop, show=show))
        return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs)

    def users(self, users, prop='blockinfo|groups|editcount'):
        """
        Get information about a list of users.

        API doc: https://www.mediawiki.org/wiki/API:Users
        """

        return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop)

    def watchlist(self, allrev=False, start=None, end=None, namespace=None, dir='older',
                  prop=None, show=None, limit=None):
        """
        List the pages on the current user's watchlist.

        API doc: https://www.mediawiki.org/wiki/API:Watchlist
        """

        kwargs = dict(listing.List.generate_kwargs('wl', start=start, end=end,
                                                   namespace=namespace, dir=dir,
                                                   prop=prop, show=show))
        if allrev:
            kwargs['wlallrev'] = '1'
        return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs)

    def expandtemplates(self, text, title=None, generatexml=False):
        """
        Takes wikitext (text) and expands templates.

        API doc: https://www.mediawiki.org/wiki/API:Expandtemplates
        """

        kwargs = {}
        if title is None:
            kwargs['title'] = title
        if generatexml:
            kwargs['generatexml'] = '1'

        result = self.get('expandtemplates', text=text, **kwargs)

        if generatexml:
            return result['expandtemplates']['*'], result['parsetree']['*']
        else:
            return result['expandtemplates']['*']

    def ask(self, query, title=None):
        """
        Ask a query against Semantic MediaWiki.

        API doc: https://semantic-mediawiki.org/wiki/Ask_API

        Returns:
            Generator for retrieving all search results
        """
        kwargs = {}
        if title is None:
            kwargs['title'] = title

        offset = 0
        while offset is not None:
            results = self.raw_api('ask', query='{query}|offset={offset}'.format(
                query=query, offset=offset, http_method='GET'), **kwargs)

            offset = results.get('query-continue-offset')
            for result in results['query']['results']:
                yield result

Example #4

Show file

File: client.py Project: binarytrout/mwclient

class Site(object):
    """A MediaWiki site identified by its hostname.

        >>> import mwclient
        >>> site = mwclient.Site('en.wikipedia.org')

    Do not include the leading "http://".

    Mwclient assumes that the script path (where index.php and api.php are located)
    is '/w/'. If the site uses a different script path, you must specify this
    (path must end in a '/').

    Examples:

        >>> site = mwclient.Site('vim.wikia.com', path='/')
        >>> site = mwclient.Site('sourceforge.net', path='/apps/mediawiki/mwclient/')

    """
    api_limit = 500

    def __init__(self,
                 host,
                 path='/w/',
                 ext='.php',
                 pool=None,
                 retry_timeout=30,
                 max_retries=25,
                 wait_callback=lambda *x: None,
                 clients_useragent=None,
                 max_lag=3,
                 compress=True,
                 force_login=True,
                 do_init=True,
                 httpauth=None,
                 reqs=None,
                 consumer_token=None,
                 consumer_secret=None,
                 access_token=None,
                 access_secret=None,
                 client_certificate=None,
                 custom_headers=None):
        # Setup member variables
        self.host = host
        self.path = path
        self.ext = ext
        self.credentials = None
        self.compress = compress
        self.max_lag = text_type(max_lag)
        self.force_login = force_login
        self.requests = reqs or {}

        if consumer_token is not None:
            auth = OAuth1(consumer_token, consumer_secret, access_token,
                          access_secret)
        elif isinstance(httpauth, (list, tuple)):
            auth = HTTPBasicAuth(*httpauth)
        elif httpauth is None or isinstance(httpauth, (AuthBase, )):
            auth = httpauth
        else:
            raise RuntimeError(
                'Authentication is not a tuple or an instance of AuthBase')

        self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback)

        # Site properties
        self.blocked = False  # Whether current user is blocked
        self.hasmsg = False  # Whether current user has new messages
        self.groups = []  # Groups current user belongs to
        self.rights = []  # Rights current user has
        self.tokens = {}  # Edit tokens of the current user
        self.version = None

        self.namespaces = self.default_namespaces
        self.writeapi = False

        # Setup connection
        if pool is None:
            self.connection = requests.Session()
            self.connection.auth = auth
            if client_certificate:
                self.connection.cert = client_certificate

            prefix = '{} - '.format(
                clients_useragent) if clients_useragent else ''
            self.connection.headers['User-Agent'] = (
                '{prefix}MwClient/{ver} ({url})'.format(
                    prefix=prefix,
                    ver=__ver__,
                    url='https://github.com/mwclient/mwclient'))
            if custom_headers:
                self.connection.headers.update(custom_headers)
        else:
            self.connection = pool

        # Page generators
        self.pages = listing.PageList(self)
        self.categories = listing.PageList(self, namespace=14)
        self.images = listing.PageList(self, namespace=6)

        # Compat page generators
        self.Pages = self.pages
        self.Categories = self.categories
        self.Images = self.images

        # Initialization status
        self.initialized = False

        # Upload chunk size in bytes
        self.chunk_size = 1048576

        if do_init:
            try:
                self.site_init()
            except errors.APIError as e:
                if e.args[0] == 'mwoauth-invalid-authorization':
                    raise errors.OAuthAuthorizationError(e.code, e.info)

                # Private wiki, do init after login
                if e.args[0] not in {u'unknown_action', u'readapidenied'}:
                    raise

    def site_init(self):

        if self.initialized:
            info = self.get('query', meta='userinfo', uiprop='groups|rights')
            userinfo = info['query']['userinfo']
            self.username = userinfo['name']
            self.groups = userinfo.get('groups', [])
            self.rights = userinfo.get('rights', [])
            self.tokens = {}
            return

        meta = self.get('query',
                        meta='siteinfo|userinfo',
                        siprop='general|namespaces',
                        uiprop='groups|rights',
                        retry_on_error=False)

        # Extract site info
        self.site = meta['query']['general']
        self.namespaces = {
            namespace['id']: namespace.get('*', '')
            for namespace in six.itervalues(meta['query']['namespaces'])
        }
        self.writeapi = 'writeapi' in self.site

        self.version = self.version_tuple_from_generator(
            self.site['generator'])

        # Require MediaWiki version >= 1.16
        self.require(1, 16)

        # User info
        userinfo = meta['query']['userinfo']
        self.username = userinfo['name']
        self.groups = userinfo.get('groups', [])
        self.rights = userinfo.get('rights', [])
        self.initialized = True

    @staticmethod
    def version_tuple_from_generator(string, prefix='MediaWiki '):
        """Return a version tuple from a MediaWiki Generator string.

        Example:
            "MediaWiki 1.5.1" → (1, 5, 1)

        Args:
            prefix (str): The expected prefix of the string
        """
        if not string.startswith(prefix):
            raise errors.MediaWikiVersionError(
                'Unknown generator {}'.format(string))

        version = string[len(prefix):].split('.')

        def split_num(s):
            """Split the string on the first non-digit character.

            Returns:
                A tuple of the digit part as int and, if available,
                the rest of the string.
            """
            i = 0
            while i < len(s):
                if s[i] < '0' or s[i] > '9':
                    break
                i += 1
            if s[i:]:
                return (
                    int(s[:i]),
                    s[i:],
                )
            else:
                return (int(s[:i]), )

        version_tuple = sum((split_num(s) for s in version), ())

        if len(version_tuple) < 2:
            raise errors.MediaWikiVersionError('Unknown MediaWiki {}'.format(
                '.'.join(version)))

        return version_tuple

    default_namespaces = {
        0: u'',
        1: u'Talk',
        2: u'User',
        3: u'User talk',
        4: u'Project',
        5: u'Project talk',
        6: u'Image',
        7: u'Image talk',
        8: u'MediaWiki',
        9: u'MediaWiki talk',
        10: u'Template',
        11: u'Template talk',
        12: u'Help',
        13: u'Help talk',
        14: u'Category',
        15: u'Category talk',
        -1: u'Special',
        -2: u'Media'
    }

    def __repr__(self):
        return "<Site object '%s%s'>" % (self.host, self.path)

    def get(self, action, *args, **kwargs):
        """Perform a generic API call using GET.

        This is just a shorthand for calling api() with http_method='GET'.
        All arguments will be passed on.

        Returns:
            The raw response from the API call, as a dictionary.
        """
        return self.api(action, 'GET', *args, **kwargs)

    def post(self, action, *args, **kwargs):
        """Perform a generic API call using POST.

        This is just a shorthand for calling api() with http_method='POST'.
        All arguments will be passed on.

        Returns:
            The raw response from the API call, as a dictionary.
        """
        return self.api(action, 'POST', *args, **kwargs)

    def api(self, action, http_method='POST', *args, **kwargs):
        """Perform a generic API call and handle errors.

        All arguments will be passed on.

        Example:
            To get coordinates from the GeoData MediaWiki extension at English Wikipedia:

            >>> site = Site('en.wikipedia.org')
            >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen')
            >>> for page in result['query']['pages'].values():
            ...     if 'coordinates' in page:
            ...         print '{} {} {}'.format(page['title'],
            ...             page['coordinates'][0]['lat'],
            ...             page['coordinates'][0]['lon'])
            Oslo 59.95 10.75
            Copenhagen 55.6761 12.5683

        Returns:
            The raw response from the API call, as a dictionary.
        """
        kwargs.update(args)

        if action == 'query' and 'continue' not in kwargs:
            kwargs['continue'] = ''
        if action == 'query':
            if 'meta' in kwargs:
                kwargs['meta'] += '|userinfo'
            else:
                kwargs['meta'] = 'userinfo'
            if 'uiprop' in kwargs:
                kwargs['uiprop'] += '|blockinfo|hasmsg'
            else:
                kwargs['uiprop'] = 'blockinfo|hasmsg'

        sleeper = self.sleepers.make()

        while True:
            info = self.raw_api(action, http_method, **kwargs)
            if not info:
                info = {}
            if self.handle_api_result(info, sleeper=sleeper):
                return info

    def handle_api_result(self, info, kwargs=None, sleeper=None):
        if sleeper is None:
            sleeper = self.sleepers.make()

        try:
            userinfo = info['query']['userinfo']
        except KeyError:
            userinfo = ()
        if 'blockedby' in userinfo:
            self.blocked = (userinfo['blockedby'],
                            userinfo.get('blockreason', u''))
        else:
            self.blocked = False
        self.hasmsg = 'messages' in userinfo
        self.logged_in = 'anon' not in userinfo
        if 'warnings' in info:
            for module, warning in info['warnings'].items():
                if '*' in warning:
                    log.warning(warning['*'])

        if 'error' in info:
            if info['error'].get('code') in {
                    u'internal_api_error_DBConnectionError',
                    u'internal_api_error_DBQueryError'
            }:
                sleeper.sleep()
                return False

            # cope with https://phabricator.wikimedia.org/T106066
            if (info['error'].get('code') == u'mwoauth-invalid-authorization'
                    and 'Nonce already used' in info['error'].get('info')):
                log.warning(
                    'retrying due to nonce error https://phabricator.wikimedia.org/T106066'
                )
                sleeper.sleep()
                return False

            if 'query' in info['error']:
                # Semantic Mediawiki does not follow the standard error format
                raise errors.APIError(None, info['error']['query'], kwargs)

            if '*' in info['error']:
                raise errors.APIError(info['error']['code'],
                                      info['error']['info'],
                                      info['error']['*'])
            raise errors.APIError(info['error']['code'], info['error']['info'],
                                  kwargs)
        return True

    @staticmethod
    def _query_string(*args, **kwargs):
        kwargs.update(args)
        qs1 = [(k, v) for k, v in six.iteritems(kwargs)
               if k not in {'wpEditToken', 'token'}]
        qs2 = [(k, v) for k, v in six.iteritems(kwargs)
               if k in {'wpEditToken', 'token'}]
        return OrderedDict(qs1 + qs2)

    def raw_call(self,
                 script,
                 data,
                 files=None,
                 retry_on_error=True,
                 http_method='POST'):
        """
        Perform a generic request and return the raw text.

        In the event of a network problem, or a HTTP response with status code 5XX,
        we'll wait and retry the configured number of times before giving up
        if `retry_on_error` is True.

        `requests.exceptions.HTTPError` is still raised directly for
        HTTP responses with status codes in the 4XX range, and invalid
        HTTP responses.

        Args:
            script (str): Script name, usually 'api'.
            data (dict): Post data
            files (dict): Files to upload
            retry_on_error (bool): Retry on connection error

        Returns:
            The raw text response.
        """
        headers = {}
        if self.compress and gzip:
            headers['Accept-Encoding'] = 'gzip'
        sleeper = self.sleepers.make((script, data))

        scheme = 'https'
        host = self.host
        if isinstance(host, (list, tuple)):
            scheme, host = host

        url = '{scheme}://{host}{path}{script}{ext}'.format(scheme=scheme,
                                                            host=host,
                                                            path=self.path,
                                                            script=script,
                                                            ext=self.ext)

        while True:
            try:
                if http_method == 'GET':
                    stream = self.connection.get(url,
                                                 params=data,
                                                 files=files,
                                                 headers=headers,
                                                 **self.requests)
                else:
                    stream = self.connection.post(url,
                                                  data=data,
                                                  files=files,
                                                  headers=headers,
                                                  **self.requests)
                if stream.headers.get('x-database-lag'):
                    wait_time = int(stream.headers.get('retry-after'))
                    log.warning('Database lag exceeds max lag. '
                                'Waiting for {} seconds'.format(wait_time))
                    sleeper.sleep(wait_time)
                elif stream.status_code == 200:
                    return stream.text
                elif stream.status_code < 500 or stream.status_code > 599:
                    stream.raise_for_status()
                else:
                    if not retry_on_error:
                        stream.raise_for_status()
                    log.warning('Received {status} response: {text}. '
                                'Retrying in a moment.'.format(
                                    status=stream.status_code,
                                    text=stream.text))
                    sleeper.sleep()

            except requests.exceptions.ConnectionError:
                # In the event of a network problem
                # (e.g. DNS failure, refused connection, etc),
                # Requests will raise a ConnectionError exception.
                if not retry_on_error:
                    raise
                log.warning('Connection error. Retrying in a moment.')
                sleeper.sleep()

    def raw_api(self, action, http_method='POST', *args, **kwargs):
        """Send a call to the API."""
        try:
            retry_on_error = kwargs.pop('retry_on_error')
        except KeyError:
            retry_on_error = True
        kwargs['action'] = action
        kwargs['format'] = 'json'
        data = self._query_string(*args, **kwargs)
        res = self.raw_call('api',
                            data,
                            retry_on_error=retry_on_error,
                            http_method=http_method)

        try:
            return json.loads(res, object_pairs_hook=OrderedDict)
        except ValueError:
            if res.startswith('MediaWiki API is not enabled for this site.'):
                raise errors.APIDisabledError
            raise errors.InvalidResponse(res)

    def raw_index(self, action, http_method='POST', *args, **kwargs):
        """Sends a call to index.php rather than the API."""
        kwargs['action'] = action
        kwargs['maxlag'] = self.max_lag
        data = self._query_string(*args, **kwargs)
        return self.raw_call('index', data, http_method=http_method)

    def require(self, major, minor, revision=None, raise_error=True):
        if self.version is None:
            if raise_error is None:
                return
            raise RuntimeError('Site %s has not yet been initialized' %
                               repr(self))

        if revision is None:
            if self.version[:2] >= (major, minor):
                return True
            elif raise_error:
                raise errors.MediaWikiVersionError(
                    'Requires version {required[0]}.{required[1]}, '
                    'current version is {current[0]}.{current[1]}'.format(
                        required=(major, minor), current=(self.version[:2])))
            else:
                return False
        else:
            raise NotImplementedError

    # Actions
    def email(self, user, text, subject, cc=False):
        """
        Send email to a specified user on the wiki.

            >>> try:
            ...     site.email('SomeUser', 'Some message', 'Some subject')
            ... except mwclient.errors.NoSpecifiedEmailError as e:
            ...     print 'The user does not accept email, or has not specified an email address.'

        Args:
            user (str): User name of the recipient
            text (str): Body of the email
            subject (str): Subject of the email
            cc (bool): True to send a copy of the email to yourself (default is False)

        Returns:
            Dictionary of the JSON response

        Raises:
            NoSpecifiedEmailError (mwclient.errors.NoSpecifiedEmailError): if recipient does not accept email
            EmailError (mwclient.errors.EmailError): on other errors
        """

        token = self.get_token('email')

        try:
            info = self.post('emailuser',
                             target=user,
                             subject=subject,
                             text=text,
                             ccme=cc,
                             token=token)
        except errors.APIError as e:
            if e.args[0] == u'noemail':
                raise errors.NoSpecifiedEmail(user, e.args[1])
            raise errors.EmailError(*e)

        return info

    def login(self, username=None, password=None, cookies=None, domain=None):
        """Login to the wiki."""

        if username and password:
            self.credentials = (username, password, domain)
        if cookies:
            self.connection.cookies.update(cookies)

        if self.credentials:
            sleeper = self.sleepers.make()
            kwargs = {
                'lgname': self.credentials[0],
                'lgpassword': self.credentials[1]
            }
            if self.credentials[2]:
                kwargs['lgdomain'] = self.credentials[2]

            # Try to login using the scheme for MW 1.27+. If the wiki is read protected,
            # it is not possible to get the wiki version upfront using the API, so we just
            # have to try. If the attempt fails, we try the old method.
            try:
                kwargs['lgtoken'] = self.get_token('login')
            except (errors.APIError, KeyError):
                log.debug(
                    'Failed to get login token, MediaWiki is older than 1.27.')

            while True:
                login = self.post('login', **kwargs)

                if login['login']['result'] == 'Success':
                    break
                elif login['login']['result'] == 'NeedToken':
                    kwargs['lgtoken'] = login['login']['token']
                elif login['login']['result'] == 'Throttled':
                    sleeper.sleep(int(login['login'].get('wait', 5)))
                else:
                    raise errors.LoginError(self, login['login'])

        self.site_init()

    def get_token(self, type, force=False, title=None):

        if self.version is None or self.version[:2] >= (1, 24):
            # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces
            # the majority of older tokens, like edittoken and movetoken.
            if type not in {
                    'watch', 'patrol', 'rollback', 'userrights', 'login'
            }:
                type = 'csrf'

        if type not in self.tokens:
            self.tokens[type] = '0'

        if self.tokens.get(type, '0') == '0' or force:

            if self.version is None or self.version[:2] >= (1, 24):
                # We use raw_api() rather than api() because api() is adding "userinfo"
                # to the query and this raises an readapideniederror if the wiki is read
                # protected and we're trying to fetch a login token.
                info = self.raw_api('query', 'GET', meta='tokens', type=type)

                self.handle_api_result(info)

                # Note that for read protected wikis, we don't know the version when
                # fetching the login token. If it's < 1.27, the request below will
                # raise a KeyError that we should catch.
                self.tokens[type] = info['query']['tokens']['%stoken' % type]

            else:
                if title is None:
                    # Some dummy title was needed to get a token prior to 1.24
                    title = 'Test'
                info = self.post('query',
                                 titles=title,
                                 prop='info',
                                 intoken=type)
                for i in six.itervalues(info['query']['pages']):
                    if i['title'] == title:
                        self.tokens[type] = i['%stoken' % type]

        return self.tokens[type]

    def upload(self,
               file=None,
               filename=None,
               description='',
               ignore=False,
               file_size=None,
               url=None,
               filekey=None,
               comment=None):
        """Upload a file to the site.

        Note that one of `file`, `filekey` and `url` must be specified, but not
        more than one. For normal uploads, you specify `file`.

        Args:
            file (str): File object or stream to upload.
            filename (str): Destination filename, don't include namespace
                            prefix like 'File:'
            description (str): Wikitext for the file description page.
            ignore (bool): True to upload despite any warnings.
            file_size (int): Deprecated in mwclient 0.7
            url (str): URL to fetch the file from.
            filekey (str): Key that identifies a previous upload that was
                           stashed temporarily.
            comment (str): Upload comment. Also used as the initial page text
                           for new files if `description` is not specified.

        Example:

            >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg',
                              description='Some description')

        Returns:
            JSON result from the API.

        Raises:
            errors.InsufficientPermission
            requests.exceptions.HTTPError
        """

        if file_size is not None:
            # Note that DeprecationWarning is hidden by default since Python 2.7
            warnings.warn('file_size is deprecated since mwclient 0.7',
                          DeprecationWarning)

        if filename is None:
            raise TypeError('filename must be specified')

        if len([x for x in [file, filekey, url] if x is not None]) != 1:
            raise TypeError(
                "exactly one of 'file', 'filekey' and 'url' must be specified")

        image = self.Images[filename]
        if not image.can('upload'):
            raise errors.InsufficientPermission(filename)

        if comment is None:
            comment = description
            text = None
        else:
            comment = comment
            text = description

        if file is not None:
            if not hasattr(file, 'read'):
                file = open(file, 'rb')

            content_size = file.seek(0, 2)
            file.seek(0)

            if self.version[:2] >= (1, 20) and content_size > self.chunk_size:
                return self.chunk_upload(file, filename, ignore, comment, text)

        predata = {
            'action': 'upload',
            'format': 'json',
            'filename': filename,
            'comment': comment,
            'text': text,
            'token': image.get_token('edit'),
        }

        if ignore:
            predata['ignorewarnings'] = 'true'
        if url:
            predata['url'] = url

        # sessionkey was renamed to filekey in MediaWiki 1.18
        # https://phabricator.wikimedia.org/rMW5f13517e36b45342f228f3de4298bb0fe186995d
        if self.version[:2] < (1, 18):
            predata['sessionkey'] = filekey
        else:
            predata['filekey'] = filekey

        postdata = predata
        files = None
        if file is not None:

            # Workaround for https://github.com/mwclient/mwclient/issues/65
            # ----------------------------------------------------------------
            # Since the filename in Content-Disposition is not interpreted,
            # we can send some ascii-only dummy name rather than the real
            # filename, which might contain non-ascii.
            files = {'file': ('fake-filename', file)}

        sleeper = self.sleepers.make()
        while True:
            data = self.raw_call('api', postdata, files)
            info = json.loads(data)
            if not info:
                info = {}
            if self.handle_api_result(info, kwargs=predata, sleeper=sleeper):
                response = info.get('upload', {})
                break
        if file is not None:
            file.close()
        return response

    def chunk_upload(self, file, filename, ignorewarnings, comment, text):
        """Upload a file to the site in chunks.

        This method is called by `Site.upload` if you are connecting to a newer
        MediaWiki installation, so it's normally not necessary to call this
        method directly.

        Args:
            file (file-like object): File object or stream to upload.
            params (dict): Dict containing upload parameters.
        """
        image = self.Images[filename]

        content_size = file.seek(0, 2)
        file.seek(0)

        params = {
            'action': 'upload',
            'format': 'json',
            'stash': 1,
            'offset': 0,
            'filename': filename,
            'filesize': content_size,
            'token': image.get_token('edit'),
        }
        if ignorewarnings:
            params['ignorewarnings'] = 'true'

        sleeper = self.sleepers.make()
        offset = 0
        for chunk in read_in_chunks(file, self.chunk_size):
            while True:
                data = self.raw_call('api', params, files={'chunk': chunk})
                info = json.loads(data)
                if self.handle_api_result(info, kwargs=params,
                                          sleeper=sleeper):
                    response = info.get('upload', {})
                    break

            offset += chunk.tell()
            chunk.close()
            log.debug('%s: Uploaded %d of %d bytes', filename, offset,
                      content_size)
            params['filekey'] = response['filekey']
            if response['result'] == 'Continue':
                params['offset'] = response['offset']
            elif response['result'] == 'Success':
                file.close()
                break
            else:
                # Some kind or error or warning occured. In any case, we do not
                # get the parameters we need to continue, so we should return
                # the response now.
                file.close()
                return response

        del params['action']
        del params['stash']
        del params['offset']
        params['comment'] = comment
        params['text'] = text
        return self.post('upload', **params)

    def parse(self,
              text=None,
              title=None,
              page=None,
              prop=None,
              redirects=False,
              mobileformat=False):
        kwargs = {}
        if text is not None:
            kwargs['text'] = text
        if title is not None:
            kwargs['title'] = title
        if page is not None:
            kwargs['page'] = page
        if prop is not None:
            kwargs['prop'] = prop
        if redirects:
            kwargs['redirects'] = '1'
        if mobileformat:
            kwargs['mobileformat'] = '1'
        result = self.post('parse', **kwargs)
        return result['parse']

    # def block(self): TODO?
    # def unblock: TODO?
    # def patrol: TODO?
    # def import: TODO?

    # Lists
    def allpages(self,
                 start=None,
                 prefix=None,
                 namespace='0',
                 filterredir='all',
                 minsize=None,
                 maxsize=None,
                 prtype=None,
                 prlevel=None,
                 limit=None,
                 dir='ascending',
                 filterlanglinks='all',
                 generator=True,
                 end=None):
        """Retrieve all pages on the wiki as a generator."""

        pfx = listing.List.get_prefix('ap', generator)
        kwargs = dict(
            listing.List.generate_kwargs(
                pfx,
                ('from', start),
                ('to', end),
                prefix=prefix,
                minsize=minsize,
                maxsize=maxsize,
                prtype=prtype,
                prlevel=prlevel,
                namespace=namespace,
                filterredir=filterredir,
                dir=dir,
                filterlanglinks=filterlanglinks,
            ))
        return listing.List.get_list(generator)(self,
                                                'allpages',
                                                'ap',
                                                limit=limit,
                                                return_values='title',
                                                **kwargs)

    def allimages(self,
                  start=None,
                  prefix=None,
                  minsize=None,
                  maxsize=None,
                  limit=None,
                  dir='ascending',
                  sha1=None,
                  sha1base36=None,
                  generator=True,
                  end=None):
        """Retrieve all images on the wiki as a generator."""

        pfx = listing.List.get_prefix('ai', generator)
        kwargs = dict(
            listing.List.generate_kwargs(
                pfx,
                ('from', start),
                ('to', end),
                prefix=prefix,
                minsize=minsize,
                maxsize=maxsize,
                dir=dir,
                sha1=sha1,
                sha1base36=sha1base36,
            ))
        return listing.List.get_list(generator)(self,
                                                'allimages',
                                                'ai',
                                                limit=limit,
                                                return_values='timestamp|url',
                                                **kwargs)

    def alllinks(self,
                 start=None,
                 prefix=None,
                 unique=False,
                 prop='title',
                 namespace='0',
                 limit=None,
                 generator=True,
                 end=None):
        """Retrieve a list of all links on the wiki as a generator."""

        pfx = listing.List.get_prefix('al', generator)
        kwargs = dict(
            listing.List.generate_kwargs(pfx, ('from', start), ('to', end),
                                         prefix=prefix,
                                         prop=prop,
                                         namespace=namespace))
        if unique:
            kwargs[pfx + 'unique'] = '1'
        return listing.List.get_list(generator)(self,
                                                'alllinks',
                                                'al',
                                                limit=limit,
                                                return_values='title',
                                                **kwargs)

    def allcategories(self,
                      start=None,
                      prefix=None,
                      dir='ascending',
                      limit=None,
                      generator=True,
                      end=None):
        """Retrieve all categories on the wiki as a generator."""

        pfx = listing.List.get_prefix('ac', generator)
        kwargs = dict(
            listing.List.generate_kwargs(pfx, ('from', start), ('to', end),
                                         prefix=prefix,
                                         dir=dir))
        return listing.List.get_list(generator)(self,
                                                'allcategories',
                                                'ac',
                                                limit=limit,
                                                **kwargs)

    def allusers(self,
                 start=None,
                 prefix=None,
                 group=None,
                 prop=None,
                 limit=None,
                 witheditsonly=False,
                 activeusers=False,
                 rights=None,
                 end=None):
        """Retrieve all users on the wiki as a generator."""

        kwargs = dict(
            listing.List.generate_kwargs('au', ('from', start), ('to', end),
                                         prefix=prefix,
                                         group=group,
                                         prop=prop,
                                         rights=rights,
                                         witheditsonly=witheditsonly,
                                         activeusers=activeusers))
        return listing.List(self, 'allusers', 'au', limit=limit, **kwargs)

    def blocks(self,
               start=None,
               end=None,
               dir='older',
               ids=None,
               users=None,
               limit=None,
               prop='id|user|by|timestamp|expiry|reason|flags'):
        """Retrieve blocks as a generator.

        Each block is a dictionary containing:

        - user: the username or IP address of the user
        - id: the ID of the block
        - timestamp: when the block was added
        - expiry: when the block runs out (infinity for indefinite blocks)
        - reason: the reason they are blocked
        - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page
        - by: the administrator who blocked the user
        - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled.

        """

        # TODO: Fix. Fix what?
        kwargs = dict(
            listing.List.generate_kwargs('bk',
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         ids=ids,
                                         users=users,
                                         prop=prop))
        return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs)

    def deletedrevisions(self,
                         start=None,
                         end=None,
                         dir='older',
                         namespace=None,
                         limit=None,
                         prop='user|comment'):
        # TODO: Fix

        kwargs = dict(
            listing.List.generate_kwargs('dr',
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         namespace=namespace,
                                         prop=prop))
        return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs)

    def exturlusage(self,
                    query,
                    prop=None,
                    protocol='http',
                    namespace=None,
                    limit=None):
        r"""Retrieve the list of pages that link to a particular domain or URL, as a generator.

        This API call mirrors the Special:LinkSearch function on-wiki.

        Query can be a domain like 'bbc.co.uk'.
        Wildcards can be used, e.g. '\*.bbc.co.uk'.
        Alternatively, a query can contain a full domain name and some or all of a URL:
        e.g. '\*.wikipedia.org/wiki/\*'

        See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details.

        The generator returns dictionaries containing three keys:
        - url: the URL linked to.
        - ns: namespace of the wiki page
        - pageid: the ID of the wiki page
        - title: the page title.

        """

        kwargs = dict(
            listing.List.generate_kwargs('eu',
                                         query=query,
                                         prop=prop,
                                         protocol=protocol,
                                         namespace=namespace))
        return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs)

    def logevents(self,
                  type=None,
                  prop=None,
                  start=None,
                  end=None,
                  dir='older',
                  user=None,
                  title=None,
                  limit=None,
                  action=None):
        """Retrieve logevents as a generator."""
        kwargs = dict(
            listing.List.generate_kwargs('le',
                                         prop=prop,
                                         type=type,
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         user=user,
                                         title=title,
                                         action=action))
        return listing.List(self, 'logevents', 'le', limit=limit, **kwargs)

    def checkuserlog(self,
                     user=None,
                     target=None,
                     limit=10,
                     dir='older',
                     start=None,
                     end=None):
        """Retrieve checkuserlog items as a generator."""

        kwargs = dict(
            listing.List.generate_kwargs('cul',
                                         target=target,
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         user=user))
        return listing.NestedList('entries',
                                  self,
                                  'checkuserlog',
                                  'cul',
                                  limit=limit,
                                  **kwargs)

    # def protectedtitles requires 1.15
    def random(self, namespace, limit=20):
        """Retrieve a generator of random pages from a particular namespace.

        limit specifies the number of random articles retrieved.
        namespace is a namespace identifier integer.

        Generator contains dictionary with namespace, page ID and title.

        """

        kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace))
        return listing.List(self, 'random', 'rn', limit=limit, **kwargs)

    def recentchanges(self,
                      start=None,
                      end=None,
                      dir='older',
                      namespace=None,
                      prop=None,
                      show=None,
                      limit=None,
                      type=None,
                      toponly=None):
        """List recent changes to the wiki, à la Special:Recentchanges.
        """
        kwargs = dict(
            listing.List.generate_kwargs('rc',
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         namespace=namespace,
                                         prop=prop,
                                         show=show,
                                         type=type,
                                         toponly='1' if toponly else None))
        return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs)

    def revisions(self,
                  revids,
                  prop='ids|timestamp|flags|comment|user',
                  expandtemplates=False,
                  diffto='prev'):
        """Get data about a list of revisions.

        See also the `Page.revisions()` method.

        API doc: https://www.mediawiki.org/wiki/API:Revisions

        Example: Get revision text for two revisions:

            >>> for revision in site.revisions([689697696, 689816909], prop='content'):
            ...     print revision['*']

        Args:
            revids (list): A list of (max 50) revisions.
            prop (str): Which properties to get for each revision.
            expandtemplates (bool): Expand templates in `rvprop=content` output.
            diffto (str): Revision ID to diff each revision to. Use "prev",
                          "next" and "cur" for the previous, next and current
                          revision respectively.

        Returns:
            A list of revisions
        """
        kwargs = {
            'prop': 'revisions',
            'rvprop': prop,
            'revids': '|'.join(map(text_type, revids))
        }
        if expandtemplates:
            kwargs['rvexpandtemplates'] = '1'
        if diffto:
            kwargs['rvdiffto'] = diffto

        revisions = []
        pages = self.get('query', **kwargs).get('query', {}).get('pages',
                                                                 {}).values()
        for page in pages:
            for revision in page.get('revisions', ()):
                revision['pageid'] = page.get('pageid')
                revision['pagetitle'] = page.get('title')
                revision['timestamp'] = parse_timestamp(revision['timestamp'])
                revisions.append(revision)
        return revisions

    def search(self,
               search,
               namespace='0',
               what=None,
               redirects=False,
               limit=None):
        """Perform a full text search.

        API doc: https://www.mediawiki.org/wiki/API:Search

        Example:
            >>> for result in site.search('prefix:Template:Citation/'):
            ...     print(result.get('title'))

        Args:
            search (str): The query string
            namespace (int): The namespace to search (default: 0)
            what (str): Search scope: 'text' for fulltext, or 'title' for titles only.
                        Depending on the search backend,
                        both options may not be available.
                        For instance
                        `CirrusSearch <https://www.mediawiki.org/wiki/Help:CirrusSearch>`_
                        doesn't support 'title', but instead provides an "intitle:"
                        query string filter.
            redirects (bool): Include redirect pages in the search
                              (option removed in MediaWiki 1.23).

        Returns:
            mwclient.listings.List: Search results iterator
        """
        kwargs = dict(
            listing.List.generate_kwargs('sr',
                                         search=search,
                                         namespace=namespace,
                                         what=what))
        if redirects:
            kwargs['srredirects'] = '1'
        return listing.List(self, 'search', 'sr', limit=limit, **kwargs)

    def usercontributions(self,
                          user,
                          start=None,
                          end=None,
                          dir='older',
                          namespace=None,
                          prop=None,
                          show=None,
                          limit=None):
        """
        List the contributions made by a given user to the wiki, à la Special:Contributions.

        API doc: https://www.mediawiki.org/wiki/API:Usercontribs
        """
        kwargs = dict(
            listing.List.generate_kwargs('uc',
                                         user=user,
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         namespace=namespace,
                                         prop=prop,
                                         show=show))
        return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs)

    def users(self, users, prop='blockinfo|groups|editcount'):
        """
        Get information about a list of users.

        API doc: https://www.mediawiki.org/wiki/API:Users
        """

        return listing.List(self,
                            'users',
                            'us',
                            ususers='|'.join(users),
                            usprop=prop)

    def watchlist(self,
                  allrev=False,
                  start=None,
                  end=None,
                  namespace=None,
                  dir='older',
                  prop=None,
                  show=None,
                  limit=None):
        """
        List the pages on the current user's watchlist.

        API doc: https://www.mediawiki.org/wiki/API:Watchlist
        """

        kwargs = dict(
            listing.List.generate_kwargs('wl',
                                         start=start,
                                         end=end,
                                         namespace=namespace,
                                         dir=dir,
                                         prop=prop,
                                         show=show))
        if allrev:
            kwargs['wlallrev'] = '1'
        return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs)

    def expandtemplates(self, text, title=None, generatexml=False):
        """
        Takes wikitext (text) and expands templates.

        API doc: https://www.mediawiki.org/wiki/API:Expandtemplates
        """

        kwargs = {}
        if title is None:
            kwargs['title'] = title
        if generatexml:
            kwargs['generatexml'] = '1'

        result = self.get('expandtemplates', text=text, **kwargs)

        if generatexml:
            return result['expandtemplates']['*'], result['parsetree']['*']
        else:
            return result['expandtemplates']['*']

    def ask(self, query, title=None):
        """
        Ask a query against Semantic MediaWiki.

        API doc: https://semantic-mediawiki.org/wiki/Ask_API

        Returns:
            Generator for retrieving all search results, with each answer as a dictionary.
            If the query is invalid, an APIError is raised. A valid query with zero
            results will not raise any error.

        Examples:

            >>> query = "[[Category:my cat]]|[[Has name::a name]]|?Has property"
            >>> for answer in site.ask(query):
            >>>     for title, data in answer.items()
            >>>         print(title)
            >>>         print(data)
        """
        kwargs = {}
        if title is None:
            kwargs['title'] = title

        offset = 0
        while offset is not None:
            results = self.raw_api('ask',
                                   query=u'{query}|offset={offset}'.format(
                                       query=query, offset=offset),
                                   http_method='GET',
                                   **kwargs)
            self.handle_api_result(results)  # raises APIError on error
            offset = results.get('query-continue-offset')
            answers = results['query'].get('results') or {}
            for key, value in answers.items():
                yield {key: value}

Example #5

Show file

File: client.py Project: PierreSelim/mwclient

class Site(object):
    api_limit = 500

    def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30,
                 max_retries=25, wait_callback=lambda *x: None, clients_useragent=None,
                 max_lag=3, compress=True, force_login=True, do_init=True, httpauth=None):
        # Setup member variables
        self.host = host
        self.path = path
        self.ext = ext
        self.credentials = None
        self.compress = compress
        self.max_lag = text_type(max_lag)
        self.force_login = force_login

        if isinstance(httpauth, (list, tuple)):
            self.httpauth = HTTPBasicAuth(*httpauth)
        elif httpauth is None or isinstance(httpauth, (AuthBase,)):
            self.httpauth = httpauth
        else:
            raise RuntimeError('Authentication is not a tuple or an instance of AuthBase')

        self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback)

        # Site properties
        self.blocked = False    # Whether current user is blocked
        self.hasmsg = False  # Whether current user has new messages
        self.groups = []    # Groups current user belongs to
        self.rights = []    # Rights current user has
        self.tokens = {}    # Edit tokens of the current user
        self.version = None

        self.namespaces = self.default_namespaces
        self.writeapi = False

        # Setup connection
        if pool is None:
            self.connection = requests.Session()
            self.connection.auth = self.httpauth
            self.connection.headers['User-Agent'] = 'MwClient/' + __ver__ + ' (https://github.com/mwclient/mwclient)'
            if clients_useragent:
                self.connection.headers['User-Agent'] = clients_useragent + ' - ' + self.connection.headers['User-Agent']
        else:
            self.connection = pool

        # Page generators
        self.pages = listing.PageList(self)
        self.categories = listing.PageList(self, namespace=14)
        self.images = listing.PageList(self, namespace=6)

        # Compat page generators
        self.Pages = self.pages
        self.Categories = self.categories
        self.Images = self.images

        # Initialization status
        self.initialized = False

        if do_init:
            try:
                self.site_init()
            except errors.APIError as e:
                # Private wiki, do init after login
                if e.args[0] not in (u'unknown_action', u'readapidenied'):
                    raise

    def site_init(self):
        meta = self.api('query', meta='siteinfo|userinfo',
                        siprop='general|namespaces', uiprop='groups|rights', retry_on_error=False)

        # Extract site info
        self.site = meta['query']['general']
        self.namespaces = dict(((i['id'], i.get('*', '')) for i in six.itervalues(meta['query']['namespaces'])))
        self.writeapi = 'writeapi' in self.site

        # Determine version
        if self.site['generator'].startswith('MediaWiki '):
            version = self.site['generator'][10:].split('.')

            def split_num(s):
                i = 0
                while i < len(s):
                    if s[i] < '0' or s[i] > '9':
                        break
                    i += 1
                if s[i:]:
                    return (int(s[:i]), s[i:], )
                else:
                    return (int(s[:i]), )
            self.version = sum((split_num(s) for s in version), ())

            if len(self.version) < 2:
                raise errors.MediaWikiVersionError('Unknown MediaWiki %s' % '.'.join(version))
        else:
            raise errors.MediaWikiVersionError('Unknown generator %s' % self.site['generator'])

        # Require MediaWiki version >= 1.16
        self.require(1, 16)

        # User info
        userinfo = meta['query']['userinfo']
        self.username = userinfo['name']
        self.groups = userinfo.get('groups', [])
        self.rights = userinfo.get('rights', [])
        self.initialized = True

    default_namespaces = {0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project', 5: u'Project talk',
                          6: u'Image', 7: u'Image talk', 8: u'MediaWiki', 9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk',
                          12: u'Help', 13: u'Help talk', 14: u'Category', 15: u'Category talk', -1: u'Special', -2: u'Media'}

    def __repr__(self):
        return "<Site object '%s%s'>" % (self.host, self.path)

    def api(self, action, *args, **kwargs):
        """
        Perform a generic API call and handle errors. All arguments will be passed on.

        Example:
            To get coordinates from the GeoData MediaWiki extension at English Wikipedia:

            >>> site = Site('en.wikipedia.org')
            >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen')
            >>> for page in result['query']['pages'].values():
            ...     if 'coordinates' in page:
            ...         print page['title'], page['coordinates'][0]['lat'], page['coordinates'][0]['lon']
            Oslo 59.95 10.75
            Copenhagen 55.6761 12.5683

        Returns:
            The raw response from the API call, as a dictionary.
        """
        kwargs.update(args)

        if 'continue' not in kwargs:
            kwargs['continue'] = ''
        if action == 'query':
            if 'meta' in kwargs:
                kwargs['meta'] += '|userinfo'
            else:
                kwargs['meta'] = 'userinfo'
            if 'uiprop' in kwargs:
                kwargs['uiprop'] += '|blockinfo|hasmsg'
            else:
                kwargs['uiprop'] = 'blockinfo|hasmsg'

        sleeper = self.sleepers.make()

        while True:
            info = self.raw_api(action, **kwargs)
            if not info:
                info = {}
            if self.handle_api_result(info, sleeper=sleeper):
                return info

    def handle_api_result(self, info, kwargs=None, sleeper=None):
        if sleeper is None:
            sleeper = self.sleepers.make()

        try:
            userinfo = info['query']['userinfo']
        except KeyError:
            userinfo = ()
        if 'blockedby' in userinfo:
            self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u''))
        else:
            self.blocked = False
        self.hasmsg = 'messages' in userinfo
        self.logged_in = 'anon' not in userinfo
        if 'error' in info:
            if info['error']['code'] in (u'internal_api_error_DBConnectionError', u'internal_api_error_DBQueryError'):
                sleeper.sleep()
                return False
            if '*' in info['error']:
                raise errors.APIError(info['error']['code'],
                                      info['error']['info'], info['error']['*'])
            raise errors.APIError(info['error']['code'],
                                  info['error']['info'], kwargs)
        return True

    @staticmethod
    def _query_string(*args, **kwargs):
        kwargs.update(args)
        qs1 = [(k, v) for k, v in six.iteritems(kwargs) if k not in ('wpEditToken', 'token')]
        qs2 = [(k, v) for k, v in six.iteritems(kwargs) if k in ('wpEditToken', 'token')]
        return OrderedDict(qs1 + qs2)

    def raw_call(self, script, data, files=None, retry_on_error=True):
        """
        Perform a generic API call and return the raw text.

        In the event of a network problem, or a HTTP response with status code 5XX,
        we'll wait and retry the configured number of times before giving up
        if `retry_on_error` is True.

        `requests.exceptions.HTTPError` is still raised directly for
        HTTP responses with status codes in the 4XX range, and invalid
        HTTP responses.

        Args:
            script (str): Script name, usually 'api'.
            data (dict): Post data
            files (dict): Files to upload
            retry_on_error (bool): Retry on connection error

        Returns:
            The raw text response.
        """
        url = self.path + script + self.ext
        headers = {}
        if self.compress and gzip:
            headers['Accept-Encoding'] = 'gzip'
        sleeper = self.sleepers.make((script, data))
        while True:
            scheme = 'http'  # Should we move to 'https' as default?
            host = self.host
            if isinstance(host, (list, tuple)):
                scheme, host = host

            fullurl = '{scheme}://{host}{url}'.format(scheme=scheme, host=host, url=url)

            try:
                stream = self.connection.post(fullurl, data=data, files=files, headers=headers)
                if stream.headers.get('x-database-lag'):
                    wait_time = int(stream.headers.get('retry-after'))
                    log.warning('Database lag exceeds max lag. Waiting for %d seconds', wait_time)
                    sleeper.sleep(wait_time)
                elif stream.status_code == 200:
                    return stream.text
                elif stream.status_code < 500 or stream.status_code > 599:
                    stream.raise_for_status()
                else:
                    if not retry_on_error:
                        stream.raise_for_status()
                    log.warning('Received %s response: %s. Retrying in a moment.', stream.status_code, stream.text)
                    sleeper.sleep()

            except requests.exceptions.ConnectionError:
                # In the event of a network problem (e.g. DNS failure, refused connection, etc),
                # Requests will raise a ConnectionError exception.
                if not retry_on_error:
                    raise
                log.warning('Connection error. Retrying in a moment.')
                sleeper.sleep()

    def raw_api(self, action, *args, **kwargs):
        """Sends a call to the API."""
        try:
            retry_on_error = kwargs.pop('retry_on_error')
        except KeyError:
            retry_on_error = True
        kwargs['action'] = action
        kwargs['format'] = 'json'
        data = self._query_string(*args, **kwargs)
        res = self.raw_call('api', data, retry_on_error=retry_on_error)

        try:
            return json.loads(res)
        except ValueError:
            if res.startswith('MediaWiki API is not enabled for this site.'):
                raise errors.APIDisabledError
            raise errors.InvalidResponse(res)

    def raw_index(self, action, *args, **kwargs):
        """Sends a call to index.php rather than the API."""
        kwargs['action'] = action
        kwargs['maxlag'] = self.max_lag
        data = self._query_string(*args, **kwargs)
        return self.raw_call('index', data)

    def require(self, major, minor, revision=None, raise_error=True):
        if self.version is None:
            if raise_error is None:
                return
            raise RuntimeError('Site %s has not yet been initialized' % repr(self))

        if revision is None:
            if self.version[:2] >= (major, minor):
                return True
            elif raise_error:
                raise errors.MediaWikiVersionError('Requires version %s.%s, current version is %s.%s'
                                                   % ((major, minor) + self.version[:2]))
            else:
                return False
        else:
            raise NotImplementedError

    # Actions
    def email(self, user, text, subject, cc=False):
        """
        Send email to a specified user on the wiki.

            >>> try:
            ...     site.email('SomeUser', 'Some message', 'Some subject')
            ... except mwclient.errors.NoSpecifiedEmailError as e:
            ...     print 'The user does not accept email, or has not specified an email address.'

        Args:
            user (str): User name of the recipient
            text (str): Body of the email
            subject (str): Subject of the email
            cc (bool): True to send a copy of the email to yourself (default is False)

        Returns:
            Dictionary of the JSON response

        Raises:
            NoSpecifiedEmailError (mwclient.errors.NoSpecifiedEmailError): if recipient does not accept email
            EmailError (mwclient.errors.EmailError): on other errors
        """

        token = self.get_token('email')

        try:
            info = self.api('emailuser', target=user, subject=subject,
                            text=text, ccme=cc, token=token)
        except errors.APIError as e:
            if e.args[0] == u'noemail':
                raise errors.NoSpecifiedEmail(user, e.args[1])
            raise errors.EmailError(*e)

        return info

    def login(self, username=None, password=None, cookies=None, domain=None):
        """Login to the wiki."""

        if username and password:
            self.credentials = (username, password, domain)
        if cookies:
            self.connection.cookies.update(cookies)

        if self.credentials:
            sleeper = self.sleepers.make()
            kwargs = {
                'lgname': self.credentials[0],
                'lgpassword': self.credentials[1]
            }
            if self.credentials[2]:
                kwargs['lgdomain'] = self.credentials[2]
            while True:
                login = self.api('login', **kwargs)
                if login['login']['result'] == 'Success':
                    break
                elif login['login']['result'] == 'NeedToken':
                    kwargs['lgtoken'] = login['login']['token']
                elif login['login']['result'] == 'Throttled':
                    sleeper.sleep(int(login['login'].get('wait', 5)))
                else:
                    raise errors.LoginError(self, login['login'])

        if self.initialized:
            info = self.api('query', meta='userinfo', uiprop='groups|rights')
            userinfo = info['query']['userinfo']
            self.username = userinfo['name']
            self.groups = userinfo.get('groups', [])
            self.rights = userinfo.get('rights', [])
            self.tokens = {}
        else:
            self.site_init()

    def get_token(self, type, force=False, title=None):

        if self.version[:2] >= (1, 24):
            # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces
            # the majority of older tokens, like edittoken and movetoken.
            if type not in ['watch', 'patrol', 'rollback', 'userrights']:
                type = 'csrf'

        if type not in self.tokens:
            self.tokens[type] = '0'

        if self.tokens.get(type, '0') == '0' or force:

            if self.version[:2] >= (1, 24):
                info = self.api('query', meta='tokens', type=type)
                self.tokens[type] = info['query']['tokens']['%stoken' % type]

            else:
                if title is None:
                    # Some dummy title was needed to get a token prior to 1.24
                    title = 'Test'
                info = self.api('query', titles=title,
                                prop='info', intoken=type)
                for i in six.itervalues(info['query']['pages']):
                    if i['title'] == title:
                        self.tokens[type] = i['%stoken' % type]

        return self.tokens[type]

    def upload(self, file=None, filename=None, description='', ignore=False, file_size=None,
               url=None, filekey=None, comment=None):
        """
        Uploads a file to the site. Returns JSON result from the API.
        Can raise `errors.InsufficientPermission` and `requests.exceptions.HTTPError`.

        : Parameters :
          - file         : File object or stream to upload.
          - filename     : Destination filename, don't include namespace
                           prefix like 'File:'
          - description  : Wikitext for the file description page.
          - ignore       : True to upload despite any warnings.
          - file_size    : Deprecated in mwclient 0.7
          - url          : URL to fetch the file from.
          - filekey      : Key that identifies a previous upload that was
                           stashed temporarily.
          - comment      : Upload comment. Also used as the initial page text
                           for new files if `description` is not specified.

        Note that one of `file`, `filekey` and `url` must be specified, but not more
        than one. For normal uploads, you specify `file`.

        Example:

        >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg',
                          description='Some description')
        """

        if file_size is not None:
            # Note that DeprecationWarning is hidden by default since Python 2.7
            warnings.warn(
                'file_size is deprecated since mwclient 0.7',
                DeprecationWarning
            )
            file_size = None

        if filename is None:
            raise TypeError('filename must be specified')

        if len([x for x in [file, filekey, url] if x is not None]) != 1:
            raise TypeError("exactly one of 'file', 'filekey' and 'url' must be specified")

        image = self.Images[filename]
        if not image.can('upload'):
            raise errors.InsufficientPermission(filename)

        predata = {}

        if comment is None:
            predata['comment'] = description
        else:
            predata['comment'] = comment
            predata['text'] = description

        if ignore:
            predata['ignorewarnings'] = 'true'
        predata['token'] = image.get_token('edit')
        predata['action'] = 'upload'
        predata['format'] = 'json'
        predata['filename'] = filename
        if url:
            predata['url'] = url

        # Renamed from sessionkey to filekey
        # https://git.wikimedia.org/commit/mediawiki%2Fcore.git/5f13517e
        if self.version[:2] < (1, 18):
            predata['sessionkey'] = filekey
        else:
            predata['filekey'] = filekey

        postdata = predata
        files = None
        if file is not None:

            # Workaround for https://github.com/mwclient/mwclient/issues/65
            # ----------------------------------------------------------------
            # Since the filename in Content-Disposition is not interpreted,
            # we can send some ascii-only dummy name rather than the real
            # filename, which might contain non-ascii.
            file = ('fake-filename', file)
            # End of workaround
            # ----------------------------------------------------------------

            files = {'file': file}

        sleeper = self.sleepers.make()
        while True:
            data = self.raw_call('api', postdata, files)
            info = json.loads(data)
            if not info:
                info = {}
            if self.handle_api_result(info, kwargs=predata, sleeper=sleeper):
                return info.get('upload', {})

    def parse(self, text=None, title=None, page=None):
        kwargs = {}
        if text is not None:
            kwargs['text'] = text
        if title is not None:
            kwargs['title'] = title
        if page is not None:
            kwargs['page'] = page
        result = self.api('parse', **kwargs)
        return result['parse']

    # def block(self): TODO?
    # def unblock: TODO?
    # def patrol: TODO?
    # def import: TODO?

    # Lists
    def allpages(self, start=None, prefix=None, namespace='0', filterredir='all',
                 minsize=None, maxsize=None, prtype=None, prlevel=None,
                 limit=None, dir='ascending', filterlanglinks='all', generator=True):
        """Retrieve all pages on the wiki as a generator."""

        pfx = listing.List.get_prefix('ap', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix,
                                                   minsize=minsize, maxsize=maxsize, prtype=prtype, prlevel=prlevel,
                                                   namespace=namespace, filterredir=filterredir, dir=dir,
                                                   filterlanglinks=filterlanglinks))
        return listing.List.get_list(generator)(self, 'allpages', 'ap', limit=limit, return_values='title', **kwargs)

    def allimages(self, start=None, prefix=None, minsize=None, maxsize=None, limit=None,
                  dir='ascending', sha1=None, sha1base36=None, prop='timestamp|url',
                  generator=True):
        """Retrieve all images on the wiki as a generator."""

        pfx = listing.List.get_prefix('ai', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix,
                                                   minsize=minsize, maxsize=maxsize,
                                                   dir=dir, sha1=sha1, sha1base36=sha1base36))
        return listing.List.get_list(generator)(self, 'allimages', 'ai', limit=limit, return_values='timestamp|url', **kwargs)

    def alllinks(self, start=None, prefix=None, unique=False, prop='title',
                 namespace='0', limit=None, generator=True):
        """Retrieve a list of all links on the wiki as a generator."""

        pfx = listing.List.get_prefix('al', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix,
                                                   prop=prop, namespace=namespace))
        if unique:
            kwargs[pfx + 'unique'] = '1'
        return listing.List.get_list(generator)(self, 'alllinks', 'al', limit=limit, return_values='title', **kwargs)

    def allcategories(self, start=None, prefix=None, dir='ascending', limit=None, generator=True):
        """Retrieve all categories on the wiki as a generator."""

        pfx = listing.List.get_prefix('ac', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix, dir=dir))
        return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit=limit, **kwargs)

    def allusers(self, start=None, prefix=None, group=None, prop=None, limit=None,
                 witheditsonly=False, activeusers=False, rights=None):
        """Retrieve all users on the wiki as a generator."""

        kwargs = dict(listing.List.generate_kwargs('au', ('from', start), prefix=prefix,
                                                   group=group, prop=prop,
                                                   rights=rights,
                                                   witheditsonly=witheditsonly,
                                                   activeusers=activeusers))
        return listing.List(self, 'allusers', 'au', limit=limit, **kwargs)

    def blocks(self, start=None, end=None, dir='older', ids=None, users=None, limit=None,
               prop='id|user|by|timestamp|expiry|reason|flags'):
        """Retrieve blocks as a generator.

        Each block is a dictionary containing:
        - user: the username or IP address of the user
        - id: the ID of the block
        - timestamp: when the block was added
        - expiry: when the block runs out (infinity for indefinite blocks)
        - reason: the reason they are blocked
        - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page
        - by: the administrator who blocked the user
        - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled.

        """

        # TODO: Fix. Fix what?
        kwargs = dict(listing.List.generate_kwargs('bk', start=start, end=end, dir=dir,
                                                   users=users, prop=prop))
        return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs)

    def deletedrevisions(self, start=None, end=None, dir='older', namespace=None,
                         limit=None, prop='user|comment'):
        # TODO: Fix

        kwargs = dict(listing.List.generate_kwargs('dr', start=start, end=end, dir=dir,
                                                   namespace=namespace, prop=prop))
        return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs)

    def exturlusage(self, query, prop=None, protocol='http', namespace=None, limit=None):
        """Retrieves list of pages that link to a particular domain or URL as a generator.

        This API call mirrors the Special:LinkSearch function on-wiki.

        Query can be a domain like 'bbc.co.uk'. Wildcards can be used, e.g. '*.bbc.co.uk'.
        Alternatively, a query can contain a full domain name and some or all of a URL:
        e.g. '*.wikipedia.org/wiki/*'

        See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details.

        The generator returns dictionaries containing three keys:
        - url: the URL linked to.
        - ns: namespace of the wiki page
        - pageid: the ID of the wiki page
        - title: the page title.

        """

        kwargs = dict(listing.List.generate_kwargs('eu', query=query, prop=prop,
                                                   protocol=protocol, namespace=namespace))
        return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs)

    def logevents(self, type=None, prop=None, start=None, end=None,
                  dir='older', user=None, title=None, limit=None, action=None):

        kwargs = dict(listing.List.generate_kwargs('le', prop=prop, type=type, start=start,
                                                   end=end, dir=dir, user=user, title=title, action=action))
        return listing.List(self, 'logevents', 'le', limit=limit, **kwargs)

    def checkuserlog(self, user=None, target=None, limit=10, dir='older', start=None, end=None):

        kwargs = dict(listing.List.generate_kwargs('cul', target=target, start=start,
                                                   end=end, dir=dir, user=user))
        return listing.NestedList('entries', self, 'checkuserlog', 'cul', limit=limit, **kwargs)

    # def protectedtitles requires 1.15
    def random(self, namespace, limit=20):
        """Retrieves a generator of random page from a particular namespace.

        limit specifies the number of random articles retrieved.
        namespace is a namespace identifier integer.

        Generator contains dictionary with namespace, page ID and title.

        """

        kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace))
        return listing.List(self, 'random', 'rn', limit=limit, **kwargs)

    def recentchanges(self, start=None, end=None, dir='older', namespace=None,
                      prop=None, show=None, limit=None, type=None, toponly=None):

        kwargs = dict(listing.List.generate_kwargs('rc', start=start, end=end, dir=dir,
                                                   namespace=namespace, prop=prop, show=show, type=type,
                                                   toponly='1' if toponly else None))
        return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs)

    def search(self, search, namespace='0', what=None, redirects=False, limit=None):
        """
        Perform a full text search.
        API doc: https://www.mediawiki.org/wiki/API:Search

            >>> for result in site.search('prefix:Template:Citation/'):
            ...     print(result.get('title'))

        Args:
            search (str): The query string
            namespace (int): The namespace to search (default: 0)
            what (str): Search scope: 'text' for fulltext, or 'title' for titles only.
                        Depending on the search backend, both options may not be available.
                        For instance
                        `CirrusSearch <https://www.mediawiki.org/wiki/Help:CirrusSearch>`_
                        doesn't support 'title', but instead provides an "intitle:"
                        query string filter.
            redirects (bool): Include redirect pages in the search (option removed in MediaWiki 1.23).

        Returns:
            mwclient.listings.List: Search results iterator
        """

        kwargs = dict(listing.List.generate_kwargs('sr', search=search, namespace=namespace, what=what))
        if redirects:
            kwargs['srredirects'] = '1'
        return listing.List(self, 'search', 'sr', limit=limit, **kwargs)

    def usercontributions(self, user, start=None, end=None, dir='older', namespace=None,
                          prop=None, show=None, limit=None):

        kwargs = dict(listing.List.generate_kwargs('uc', user=user, start=start, end=end,
                                                   dir=dir, namespace=namespace, prop=prop, show=show))
        return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs)

    def users(self, users, prop='blockinfo|groups|editcount'):

        return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop)

    def watchlist(self, allrev=False, start=None, end=None, namespace=None, dir='older',
                  prop=None, show=None, limit=None):

        kwargs = dict(listing.List.generate_kwargs('wl', start=start, end=end,
                                                   namespace=namespace, dir=dir, prop=prop, show=show))
        if allrev:
            kwargs['wlallrev'] = '1'
        return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs)

    def expandtemplates(self, text, title=None, generatexml=False):
        """Takes wikitext (text) and expands templates."""

        kwargs = {}
        if title is None:
            kwargs['title'] = title
        if generatexml:
            kwargs['generatexml'] = '1'

        result = self.api('expandtemplates', text=text, **kwargs)

        if generatexml:
            return result['expandtemplates']['*'], result['parsetree']['*']
        else:
            return result['expandtemplates']['*']

    def ask(self, query, title=None):
        """Ask a query against Semantic MediaWiki."""
        kwargs = {}
        if title is None:
            kwargs['title'] = title
        result = self.raw_api('ask', query=query, **kwargs)
        return result['query']['results']

    def embeddedin(self, title, prop='title', namespace=None,
                   limit=None):
        """Yield pages which includes that transclude a given page.

        API doc: https://www.mediawiki.org/wiki/API:Embeddedin

        Args:
            title (str): list pages that includes this title.
            namespace (int): restricts search to a given namespace
            prop (str): prop list (seperated by "|")
            limit (int): default amount of page to return for each
                query
        """
        kwargs = dict(listing.List.generate_kwargs('ei',
                                                   prop=prop,
                                                   title=title,
                                                   namespace=namespace,
                                                   limit=limit))
        for info in listing.List(self, 'embeddedin', 'ei', **kwargs):
            yield page.Page(self, info['title'])