Example #1
0
class TestSleepers(unittest.TestCase):

    def setUp(self):
        self.sleep = mock.patch('time.sleep').start()
        self.max_retries = 10
        self.sleepers = Sleepers(self.max_retries, 30)

    def tearDown(self):
        mock.patch.stopall()

    def test_make(self):
        sleeper = self.sleepers.make()
        assert type(sleeper) == Sleeper
        assert sleeper.retries == 0

    def test_sleep(self):
        sleeper = self.sleepers.make()
        sleeper.sleep()
        sleeper.sleep()
        self.sleep.assert_has_calls([mock.call(0), mock.call(30)])

    def test_min_time(self):
        sleeper = self.sleepers.make()
        sleeper.sleep(5)
        self.sleep.assert_has_calls([mock.call(5)])

    def test_retries_count(self):
        sleeper = self.sleepers.make()
        sleeper.sleep()
        sleeper.sleep()
        assert sleeper.retries == 2

    def test_max_retries(self):
        sleeper = self.sleepers.make()
        for x in range(self.max_retries):
            sleeper.sleep()
        with pytest.raises(MaximumRetriesExceeded):
            sleeper.sleep()
Example #2
0
class TestSleepers(unittest.TestCase):
    def setUp(self):
        self.sleep = mock.patch('time.sleep').start()
        self.max_retries = 10
        self.sleepers = Sleepers(self.max_retries, 30)

    def tearDown(self):
        mock.patch.stopall()

    def test_make(self):
        sleeper = self.sleepers.make()
        assert type(sleeper) == Sleeper
        assert sleeper.retries == 0

    def test_sleep(self):
        sleeper = self.sleepers.make()
        sleeper.sleep()
        sleeper.sleep()
        self.sleep.assert_has_calls([mock.call(0), mock.call(30)])

    def test_min_time(self):
        sleeper = self.sleepers.make()
        sleeper.sleep(5)
        self.sleep.assert_has_calls([mock.call(5)])

    def test_retries_count(self):
        sleeper = self.sleepers.make()
        sleeper.sleep()
        sleeper.sleep()
        assert sleeper.retries == 2

    def test_max_retries(self):
        sleeper = self.sleepers.make()
        for x in range(self.max_retries):
            sleeper.sleep()
        with pytest.raises(MaximumRetriesExceeded):
            sleeper.sleep()
Example #3
0
class Site(object):
    """A MediaWiki site identified by its hostname.

        >>> import mwclient
        >>> site = mwclient.Site('en.wikipedia.org')

    Do not include the leading "http://".

    Mwclient assumes that the script path (where index.php and api.php are located)
    is '/w/'. If the site uses a different script path, you must specify this
    (path must end in a '/').

    Examples:

        >>> site = mwclient.Site('vim.wikia.com', path='/')
        >>> site = mwclient.Site('sourceforge.net', path='/apps/mediawiki/mwclient/')

    """
    api_limit = 500

    def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30,
                 max_retries=25, wait_callback=lambda *x: None, clients_useragent=None,
                 max_lag=3, compress=True, force_login=True, do_init=True, httpauth=None,
                 reqs=None, consumer_token=None, consumer_secret=None, access_token=None,
                 access_secret=None, client_certificate=None, custom_headers=None):
        # Setup member variables
        self.host = host
        self.path = path
        self.ext = ext
        self.credentials = None
        self.compress = compress
        self.max_lag = text_type(max_lag)
        self.force_login = force_login
        self.requests = reqs or {}

        if consumer_token is not None:
            auth = OAuth1(consumer_token, consumer_secret, access_token, access_secret)
        elif isinstance(httpauth, (list, tuple)):
            auth = HTTPBasicAuth(*httpauth)
        elif httpauth is None or isinstance(httpauth, (AuthBase,)):
            auth = httpauth
        else:
            raise RuntimeError('Authentication is not a tuple or an instance of AuthBase')

        self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback)

        # Site properties
        self.blocked = False    # Whether current user is blocked
        self.hasmsg = False  # Whether current user has new messages
        self.groups = []    # Groups current user belongs to
        self.rights = []    # Rights current user has
        self.tokens = {}    # Edit tokens of the current user
        self.version = None

        self.namespaces = self.default_namespaces
        self.writeapi = False

        # Setup connection
        if pool is None:
            self.connection = requests.Session()
            self.connection.auth = auth
            if client_certificate:
                self.connection.cert = client_certificate

            prefix = '{} - '.format(clients_useragent) if clients_useragent else ''
            self.connection.headers['User-Agent'] = (
                '{prefix}MwClient/{ver} ({url})'.format(
                    prefix=prefix,
                    ver=__ver__,
                    url='https://github.com/mwclient/mwclient'
                )
            )
            if custom_headers:
                self.connection.headers.update(custom_headers)
        else:
            self.connection = pool

        # Page generators
        self.pages = listing.PageList(self)
        self.categories = listing.PageList(self, namespace=14)
        self.images = listing.PageList(self, namespace=6)

        # Compat page generators
        self.Pages = self.pages
        self.Categories = self.categories
        self.Images = self.images

        # Initialization status
        self.initialized = False

        if do_init:
            try:
                self.site_init()
            except errors.APIError as e:
                if e.args[0] == 'mwoauth-invalid-authorization':
                    raise errors.OAuthAuthorizationError(e.code, e.info)

                # Private wiki, do init after login
                if e.args[0] not in {u'unknown_action', u'readapidenied'}:
                    raise

    def site_init(self):

        if self.initialized:
            info = self.get('query', meta='userinfo', uiprop='groups|rights')
            userinfo = info['query']['userinfo']
            self.username = userinfo['name']
            self.groups = userinfo.get('groups', [])
            self.rights = userinfo.get('rights', [])
            self.tokens = {}
            return

        meta = self.get('query', meta='siteinfo|userinfo',
                        siprop='general|namespaces', uiprop='groups|rights',
                        retry_on_error=False)

        # Extract site info
        self.site = meta['query']['general']
        self.namespaces = {
            namespace['id']: namespace.get('*', '')
            for namespace in six.itervalues(meta['query']['namespaces'])
        }
        self.writeapi = 'writeapi' in self.site

        self.version = self.version_tuple_from_generator(self.site['generator'])

        # Require MediaWiki version >= 1.16
        self.require(1, 16)

        # User info
        userinfo = meta['query']['userinfo']
        self.username = userinfo['name']
        self.groups = userinfo.get('groups', [])
        self.rights = userinfo.get('rights', [])
        self.initialized = True

    @staticmethod
    def version_tuple_from_generator(string, prefix='MediaWiki '):
        """Return a version tuple from a MediaWiki Generator string.

        Example:
            "MediaWiki 1.5.1" → (1, 5, 1)

        Args:
            prefix (str): The expected prefix of the string
        """
        if not string.startswith(prefix):
            raise errors.MediaWikiVersionError('Unknown generator {}'.format(string))

        version = string[len(prefix):].split('.')

        def split_num(s):
            """Split the string on the first non-digit character.

            Returns:
                A tuple of the digit part as int and, if available,
                the rest of the string.
            """
            i = 0
            while i < len(s):
                if s[i] < '0' or s[i] > '9':
                    break
                i += 1
            if s[i:]:
                return (int(s[:i]), s[i:], )
            else:
                return (int(s[:i]), )

        version_tuple = sum((split_num(s) for s in version), ())

        if len(version_tuple) < 2:
            raise errors.MediaWikiVersionError('Unknown MediaWiki {}'
                                               .format('.'.join(version)))

        return version_tuple

    default_namespaces = {
        0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project',
        5: u'Project talk', 6: u'Image', 7: u'Image talk', 8: u'MediaWiki',
        9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk', 12: u'Help',
        13: u'Help talk', 14: u'Category', 15: u'Category talk',
        -1: u'Special', -2: u'Media'
    }

    def __repr__(self):
        return "<Site object '%s%s'>" % (self.host, self.path)

    def get(self, action, *args, **kwargs):
        """Perform a generic API call using GET.

        This is just a shorthand for calling api() with http_method='GET'.
        All arguments will be passed on.

        Returns:
            The raw response from the API call, as a dictionary.
        """
        return self.api(action, 'GET', *args, **kwargs)

    def post(self, action, *args, **kwargs):
        """Perform a generic API call using POST.

        This is just a shorthand for calling api() with http_method='POST'.
        All arguments will be passed on.

        Returns:
            The raw response from the API call, as a dictionary.
        """
        return self.api(action, 'POST', *args, **kwargs)

    def api(self, action, http_method='POST', *args, **kwargs):
        """Perform a generic API call and handle errors.

        All arguments will be passed on.

        Example:
            To get coordinates from the GeoData MediaWiki extension at English Wikipedia:

            >>> site = Site('en.wikipedia.org')
            >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen')
            >>> for page in result['query']['pages'].values():
            ...     if 'coordinates' in page:
            ...         print '{} {} {}'.format(page['title'],
            ...             page['coordinates'][0]['lat'],
            ...             page['coordinates'][0]['lon'])
            Oslo 59.95 10.75
            Copenhagen 55.6761 12.5683

        Returns:
            The raw response from the API call, as a dictionary.
        """
        kwargs.update(args)

        if 'continue' not in kwargs:
            kwargs['continue'] = ''
        if action == 'query':
            if 'meta' in kwargs:
                kwargs['meta'] += '|userinfo'
            else:
                kwargs['meta'] = 'userinfo'
            if 'uiprop' in kwargs:
                kwargs['uiprop'] += '|blockinfo|hasmsg'
            else:
                kwargs['uiprop'] = 'blockinfo|hasmsg'

        sleeper = self.sleepers.make()

        while True:
            info = self.raw_api(action, http_method, **kwargs)
            if not info:
                info = {}
            if self.handle_api_result(info, sleeper=sleeper):
                return info

    def handle_api_result(self, info, kwargs=None, sleeper=None):
        if sleeper is None:
            sleeper = self.sleepers.make()

        try:
            userinfo = info['query']['userinfo']
        except KeyError:
            userinfo = ()
        if 'blockedby' in userinfo:
            self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u''))
        else:
            self.blocked = False
        self.hasmsg = 'messages' in userinfo
        self.logged_in = 'anon' not in userinfo
        if 'error' in info:
            if info['error']['code'] in {u'internal_api_error_DBConnectionError',
                                         u'internal_api_error_DBQueryError'}:
                sleeper.sleep()
                return False
            if '*' in info['error']:
                raise errors.APIError(info['error']['code'],
                                      info['error']['info'], info['error']['*'])
            raise errors.APIError(info['error']['code'],
                                  info['error']['info'], kwargs)
        return True

    @staticmethod
    def _query_string(*args, **kwargs):
        kwargs.update(args)
        qs1 = [(k, v) for k, v in six.iteritems(kwargs) if k not in {'wpEditToken', 'token'}]
        qs2 = [(k, v) for k, v in six.iteritems(kwargs) if k in {'wpEditToken', 'token'}]
        return OrderedDict(qs1 + qs2)

    def raw_call(self, script, data, files=None, retry_on_error=True, http_method='POST'):
        """
        Perform a generic request and return the raw text.

        In the event of a network problem, or a HTTP response with status code 5XX,
        we'll wait and retry the configured number of times before giving up
        if `retry_on_error` is True.

        `requests.exceptions.HTTPError` is still raised directly for
        HTTP responses with status codes in the 4XX range, and invalid
        HTTP responses.

        Args:
            script (str): Script name, usually 'api'.
            data (dict): Post data
            files (dict): Files to upload
            retry_on_error (bool): Retry on connection error

        Returns:
            The raw text response.
        """
        headers = {}
        if self.compress and gzip:
            headers['Accept-Encoding'] = 'gzip'
        sleeper = self.sleepers.make((script, data))

        scheme = 'https'
        host = self.host
        if isinstance(host, (list, tuple)):
            scheme, host = host

        url = '{scheme}://{host}{path}{script}{ext}'.format(scheme=scheme, host=host,
                                                            path=self.path, script=script,
                                                            ext=self.ext)

        while True:
            try:
                if http_method == 'GET':
                    stream = self.connection.get(url, params=data, files=files,
                                                 headers=headers, **self.requests)
                else:
                    stream = self.connection.post(url, data=data, files=files,
                                                  headers=headers, **self.requests)
                if stream.headers.get('x-database-lag'):
                    wait_time = int(stream.headers.get('retry-after'))
                    log.warning('Database lag exceeds max lag. '
                                'Waiting for {} seconds'.format(wait_time))
                    sleeper.sleep(wait_time)
                elif stream.status_code == 200:
                    return stream.text
                elif stream.status_code < 500 or stream.status_code > 599:
                    stream.raise_for_status()
                else:
                    if not retry_on_error:
                        stream.raise_for_status()
                    log.warning('Received {status} response: {text}. '
                                'Retrying in a moment.'
                                .format(status=stream.status_code,
                                        text=stream.text))
                    sleeper.sleep()

            except requests.exceptions.ConnectionError:
                # In the event of a network problem
                # (e.g. DNS failure, refused connection, etc),
                # Requests will raise a ConnectionError exception.
                if not retry_on_error:
                    raise
                log.warning('Connection error. Retrying in a moment.')
                sleeper.sleep()

    def raw_api(self, action, http_method='POST', *args, **kwargs):
        """Send a call to the API."""
        try:
            retry_on_error = kwargs.pop('retry_on_error')
        except KeyError:
            retry_on_error = True
        kwargs['action'] = action
        kwargs['format'] = 'json'
        data = self._query_string(*args, **kwargs)
        res = self.raw_call('api', data, retry_on_error=retry_on_error,
                            http_method=http_method)

        try:
            return json.loads(res)
        except ValueError:
            if res.startswith('MediaWiki API is not enabled for this site.'):
                raise errors.APIDisabledError
            raise errors.InvalidResponse(res)

    def raw_index(self, action, http_method='POST', *args, **kwargs):
        """Sends a call to index.php rather than the API."""
        kwargs['action'] = action
        kwargs['maxlag'] = self.max_lag
        data = self._query_string(*args, **kwargs)
        return self.raw_call('index', data, http_method=http_method)

    def require(self, major, minor, revision=None, raise_error=True):
        if self.version is None:
            if raise_error is None:
                return
            raise RuntimeError('Site %s has not yet been initialized' % repr(self))

        if revision is None:
            if self.version[:2] >= (major, minor):
                return True
            elif raise_error:
                raise errors.MediaWikiVersionError(
                    'Requires version {required[0]}.{required[1]}, '
                    'current version is {current[0]}.{current[1]}'
                    .format(required=(major, minor),
                            current=(self.version[:2]))
                )
            else:
                return False
        else:
            raise NotImplementedError

    # Actions
    def email(self, user, text, subject, cc=False):
        """
        Send email to a specified user on the wiki.

            >>> try:
            ...     site.email('SomeUser', 'Some message', 'Some subject')
            ... except mwclient.errors.NoSpecifiedEmailError as e:
            ...     print 'The user does not accept email, or has not specified an email address.'

        Args:
            user (str): User name of the recipient
            text (str): Body of the email
            subject (str): Subject of the email
            cc (bool): True to send a copy of the email to yourself (default is False)

        Returns:
            Dictionary of the JSON response

        Raises:
            NoSpecifiedEmailError (mwclient.errors.NoSpecifiedEmailError): if recipient does not accept email
            EmailError (mwclient.errors.EmailError): on other errors
        """

        token = self.get_token('email')

        try:
            info = self.post('emailuser', target=user, subject=subject,
                             text=text, ccme=cc, token=token)
        except errors.APIError as e:
            if e.args[0] == u'noemail':
                raise errors.NoSpecifiedEmail(user, e.args[1])
            raise errors.EmailError(*e)

        return info

    def login(self, username=None, password=None, cookies=None, domain=None):
        """Login to the wiki."""

        if username and password:
            self.credentials = (username, password, domain)
        if cookies:
            self.connection.cookies.update(cookies)

        if self.credentials:
            sleeper = self.sleepers.make()
            kwargs = {
                'lgname': self.credentials[0],
                'lgpassword': self.credentials[1]
            }
            if self.credentials[2]:
                kwargs['lgdomain'] = self.credentials[2]
            while True:
                login = self.post('login', **kwargs)
                if login['login']['result'] == 'Success':
                    break
                elif login['login']['result'] == 'NeedToken':
                    kwargs['lgtoken'] = login['login']['token']
                elif login['login']['result'] == 'Throttled':
                    sleeper.sleep(int(login['login'].get('wait', 5)))
                else:
                    raise errors.LoginError(self, login['login'])

        self.site_init()

    def get_token(self, type, force=False, title=None):

        if self.version[:2] >= (1, 24):
            # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces
            # the majority of older tokens, like edittoken and movetoken.
            if type not in {'watch', 'patrol', 'rollback', 'userrights'}:
                type = 'csrf'

        if type not in self.tokens:
            self.tokens[type] = '0'

        if self.tokens.get(type, '0') == '0' or force:

            if self.version[:2] >= (1, 24):
                info = self.post('query', meta='tokens', type=type)
                self.tokens[type] = info['query']['tokens']['%stoken' % type]

            else:
                if title is None:
                    # Some dummy title was needed to get a token prior to 1.24
                    title = 'Test'
                info = self.post('query', titles=title,
                                 prop='info', intoken=type)
                for i in six.itervalues(info['query']['pages']):
                    if i['title'] == title:
                        self.tokens[type] = i['%stoken' % type]

        return self.tokens[type]

    def upload(self, file=None, filename=None, description='', ignore=False,
               file_size=None, url=None, filekey=None, comment=None):
        """Upload a file to the site.

        Note that one of `file`, `filekey` and `url` must be specified, but not
        more than one. For normal uploads, you specify `file`.

        Args:
            file (str): File object or stream to upload.
            filename (str): Destination filename, don't include namespace
                            prefix like 'File:'
            description (str): Wikitext for the file description page.
            ignore (bool): True to upload despite any warnings.
            file_size (int): Deprecated in mwclient 0.7
            url (str): URL to fetch the file from.
            filekey (str): Key that identifies a previous upload that was
                           stashed temporarily.
            comment (str): Upload comment. Also used as the initial page text
                           for new files if `description` is not specified.

        Example:

            >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg',
                              description='Some description')

        Returns:
            JSON result from the API.

        Raises:
            errors.InsufficientPermission
            requests.exceptions.HTTPError
        """

        if file_size is not None:
            # Note that DeprecationWarning is hidden by default since Python 2.7
            warnings.warn(
                'file_size is deprecated since mwclient 0.7',
                DeprecationWarning
            )

        if filename is None:
            raise TypeError('filename must be specified')

        if len([x for x in [file, filekey, url] if x is not None]) != 1:
            raise TypeError("exactly one of 'file', 'filekey' and 'url' must be specified")

        image = self.Images[filename]
        if not image.can('upload'):
            raise errors.InsufficientPermission(filename)

        predata = {}

        if comment is None:
            predata['comment'] = description
        else:
            predata['comment'] = comment
            predata['text'] = description

        if ignore:
            predata['ignorewarnings'] = 'true'
        predata['token'] = image.get_token('edit')
        predata['action'] = 'upload'
        predata['format'] = 'json'
        predata['filename'] = filename
        if url:
            predata['url'] = url

        # sessionkey was renamed to filekey in MediaWiki 1.18
        # https://phabricator.wikimedia.org/rMW5f13517e36b45342f228f3de4298bb0fe186995d
        if self.version[:2] < (1, 18):
            predata['sessionkey'] = filekey
        else:
            predata['filekey'] = filekey

        postdata = predata
        files = None
        if file is not None:

            # Workaround for https://github.com/mwclient/mwclient/issues/65
            # ----------------------------------------------------------------
            # Since the filename in Content-Disposition is not interpreted,
            # we can send some ascii-only dummy name rather than the real
            # filename, which might contain non-ascii.
            file = ('fake-filename', file)
            # End of workaround
            # ----------------------------------------------------------------

            files = {'file': file}

        sleeper = self.sleepers.make()
        while True:
            data = self.raw_call('api', postdata, files)
            info = json.loads(data)
            if not info:
                info = {}
            if self.handle_api_result(info, kwargs=predata, sleeper=sleeper):
                return info.get('upload', {})

    def parse(self, text=None, title=None, page=None, prop=None,
              redirects=False, mobileformat=False):
        kwargs = {}
        if text is not None:
            kwargs['text'] = text
        if title is not None:
            kwargs['title'] = title
        if page is not None:
            kwargs['page'] = page
        if prop is not None:
            kwargs['prop'] = prop
        if redirects:
            kwargs['redirects'] = '1'
        if mobileformat:
            kwargs['mobileformat'] = '1'
        result = self.get('parse', **kwargs)
        return result['parse']

    # def block(self): TODO?
    # def unblock: TODO?
    # def patrol: TODO?
    # def import: TODO?

    # Lists
    def allpages(self, start=None, prefix=None, namespace='0', filterredir='all',
                 minsize=None, maxsize=None, prtype=None, prlevel=None,
                 limit=None, dir='ascending', filterlanglinks='all', generator=True,
                 end=None):
        """Retrieve all pages on the wiki as a generator."""

        pfx = listing.List.get_prefix('ap', generator)
        kwargs = dict(listing.List.generate_kwargs(
            pfx, ('from', start), ('to', end), prefix=prefix,
            minsize=minsize, maxsize=maxsize, prtype=prtype, prlevel=prlevel,
            namespace=namespace, filterredir=filterredir, dir=dir,
            filterlanglinks=filterlanglinks,
        ))
        return listing.List.get_list(generator)(self, 'allpages', 'ap',
                                                limit=limit, return_values='title',
                                                **kwargs)

    def allimages(self, start=None, prefix=None, minsize=None, maxsize=None, limit=None,
                  dir='ascending', sha1=None, sha1base36=None, generator=True, end=None):
        """Retrieve all images on the wiki as a generator."""

        pfx = listing.List.get_prefix('ai', generator)
        kwargs = dict(listing.List.generate_kwargs(
            pfx, ('from', start), ('to', end), prefix=prefix,
            minsize=minsize, maxsize=maxsize,
            dir=dir, sha1=sha1, sha1base36=sha1base36,
        ))
        return listing.List.get_list(generator)(self, 'allimages', 'ai', limit=limit,
                                                return_values='timestamp|url',
                                                **kwargs)

    def alllinks(self, start=None, prefix=None, unique=False, prop='title',
                 namespace='0', limit=None, generator=True, end=None):
        """Retrieve a list of all links on the wiki as a generator."""

        pfx = listing.List.get_prefix('al', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), ('to', end),
                                                   prefix=prefix,
                                                   prop=prop, namespace=namespace))
        if unique:
            kwargs[pfx + 'unique'] = '1'
        return listing.List.get_list(generator)(self, 'alllinks', 'al', limit=limit,
                                                return_values='title', **kwargs)

    def allcategories(self, start=None, prefix=None, dir='ascending', limit=None,
                      generator=True, end=None):
        """Retrieve all categories on the wiki as a generator."""

        pfx = listing.List.get_prefix('ac', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), ('to', end),
                                                   prefix=prefix, dir=dir))
        return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit=limit,
                                                **kwargs)

    def allusers(self, start=None, prefix=None, group=None, prop=None, limit=None,
                 witheditsonly=False, activeusers=False, rights=None, end=None):
        """Retrieve all users on the wiki as a generator."""

        kwargs = dict(listing.List.generate_kwargs('au', ('from', start), ('to', end),
                                                   prefix=prefix,
                                                   group=group, prop=prop,
                                                   rights=rights,
                                                   witheditsonly=witheditsonly,
                                                   activeusers=activeusers))
        return listing.List(self, 'allusers', 'au', limit=limit, **kwargs)

    def blocks(self, start=None, end=None, dir='older', ids=None, users=None, limit=None,
               prop='id|user|by|timestamp|expiry|reason|flags'):
        """Retrieve blocks as a generator.

        Each block is a dictionary containing:

        - user: the username or IP address of the user
        - id: the ID of the block
        - timestamp: when the block was added
        - expiry: when the block runs out (infinity for indefinite blocks)
        - reason: the reason they are blocked
        - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page
        - by: the administrator who blocked the user
        - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled.

        """

        # TODO: Fix. Fix what?
        kwargs = dict(listing.List.generate_kwargs('bk', start=start, end=end, dir=dir,
                                                   ids=ids, users=users, prop=prop))
        return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs)

    def deletedrevisions(self, start=None, end=None, dir='older', namespace=None,
                         limit=None, prop='user|comment'):
        # TODO: Fix

        kwargs = dict(listing.List.generate_kwargs('dr', start=start, end=end, dir=dir,
                                                   namespace=namespace, prop=prop))
        return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs)

    def exturlusage(self, query, prop=None, protocol='http', namespace=None, limit=None):
        r"""Retrieve the list of pages that link to a particular domain or URL, as a generator.

        This API call mirrors the Special:LinkSearch function on-wiki.

        Query can be a domain like 'bbc.co.uk'.
        Wildcards can be used, e.g. '\*.bbc.co.uk'.
        Alternatively, a query can contain a full domain name and some or all of a URL:
        e.g. '\*.wikipedia.org/wiki/\*'

        See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details.

        The generator returns dictionaries containing three keys:
        - url: the URL linked to.
        - ns: namespace of the wiki page
        - pageid: the ID of the wiki page
        - title: the page title.

        """

        kwargs = dict(listing.List.generate_kwargs('eu', query=query, prop=prop,
                                                   protocol=protocol, namespace=namespace))
        return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs)

    def logevents(self, type=None, prop=None, start=None, end=None,
                  dir='older', user=None, title=None, limit=None, action=None):
        """Retrieve logevents as a generator."""
        kwargs = dict(listing.List.generate_kwargs('le', prop=prop, type=type, start=start,
                                                   end=end, dir=dir, user=user,
                                                   title=title, action=action))
        return listing.List(self, 'logevents', 'le', limit=limit, **kwargs)

    def checkuserlog(self, user=None, target=None, limit=10, dir='older',
                     start=None, end=None):
        """Retrieve checkuserlog items as a generator."""

        kwargs = dict(listing.List.generate_kwargs('cul', target=target, start=start,
                                                   end=end, dir=dir, user=user))
        return listing.NestedList('entries', self, 'checkuserlog', 'cul',
                                  limit=limit, **kwargs)

    # def protectedtitles requires 1.15
    def random(self, namespace, limit=20):
        """Retrieve a generator of random pages from a particular namespace.

        limit specifies the number of random articles retrieved.
        namespace is a namespace identifier integer.

        Generator contains dictionary with namespace, page ID and title.

        """

        kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace))
        return listing.List(self, 'random', 'rn', limit=limit, **kwargs)

    def recentchanges(self, start=None, end=None, dir='older', namespace=None,
                      prop=None, show=None, limit=None, type=None, toponly=None):
        """List recent changes to the wiki, à la Special:Recentchanges.
        """
        kwargs = dict(listing.List.generate_kwargs('rc', start=start, end=end, dir=dir,
                                                   namespace=namespace, prop=prop,
                                                   show=show, type=type,
                                                   toponly='1' if toponly else None))
        return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs)

    def revisions(self, revids, prop='ids|timestamp|flags|comment|user',
                  expandtemplates=False, diffto='prev'):
        """Get data about a list of revisions.

        See also the `Page.revisions()` method.

        API doc: https://www.mediawiki.org/wiki/API:Revisions

        Example: Get revision text for two revisions:

            >>> for revision in site.revisions([689697696, 689816909], prop='content'):
            ...     print revision['*']

        Args:
            revids (list): A list of (max 50) revisions.
            prop (str): Which properties to get for each revision.
            expandtemplates (bool): Expand templates in `rvprop=content` output.
            diffto (str): Revision ID to diff each revision to. Use "prev",
                          "next" and "cur" for the previous, next and current
                          revision respectively.

        Returns:
            A list of revisions
        """
        kwargs = {
            'prop': 'revisions',
            'rvprop': prop,
            'revids': '|'.join(map(text_type, revids))
        }
        if expandtemplates:
            kwargs['rvexpandtemplates'] = '1'
        if diffto:
            kwargs['rvdiffto'] = diffto

        revisions = []
        pages = self.get('query', **kwargs).get('query', {}).get('pages', {}).values()
        for page in pages:
            for revision in page.get('revisions', ()):
                revision['pageid'] = page.get('pageid')
                revision['pagetitle'] = page.get('title')
                revision['timestamp'] = parse_timestamp(revision['timestamp'])
                revisions.append(revision)
        return revisions

    def search(self, search, namespace='0', what=None, redirects=False, limit=None):
        """Perform a full text search.

        API doc: https://www.mediawiki.org/wiki/API:Search

        Example:
            >>> for result in site.search('prefix:Template:Citation/'):
            ...     print(result.get('title'))

        Args:
            search (str): The query string
            namespace (int): The namespace to search (default: 0)
            what (str): Search scope: 'text' for fulltext, or 'title' for titles only.
                        Depending on the search backend,
                        both options may not be available.
                        For instance
                        `CirrusSearch <https://www.mediawiki.org/wiki/Help:CirrusSearch>`_
                        doesn't support 'title', but instead provides an "intitle:"
                        query string filter.
            redirects (bool): Include redirect pages in the search
                              (option removed in MediaWiki 1.23).

        Returns:
            mwclient.listings.List: Search results iterator
        """
        kwargs = dict(listing.List.generate_kwargs('sr', search=search,
                                                   namespace=namespace, what=what))
        if redirects:
            kwargs['srredirects'] = '1'
        return listing.List(self, 'search', 'sr', limit=limit, **kwargs)

    def usercontributions(self, user, start=None, end=None, dir='older', namespace=None,
                          prop=None, show=None, limit=None):
        """
        List the contributions made by a given user to the wiki, à la Special:Contributions.

        API doc: https://www.mediawiki.org/wiki/API:Usercontribs
        """
        kwargs = dict(listing.List.generate_kwargs('uc', user=user, start=start, end=end,
                                                   dir=dir, namespace=namespace,
                                                   prop=prop, show=show))
        return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs)

    def users(self, users, prop='blockinfo|groups|editcount'):
        """
        Get information about a list of users.

        API doc: https://www.mediawiki.org/wiki/API:Users
        """

        return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop)

    def watchlist(self, allrev=False, start=None, end=None, namespace=None, dir='older',
                  prop=None, show=None, limit=None):
        """
        List the pages on the current user's watchlist.

        API doc: https://www.mediawiki.org/wiki/API:Watchlist
        """

        kwargs = dict(listing.List.generate_kwargs('wl', start=start, end=end,
                                                   namespace=namespace, dir=dir,
                                                   prop=prop, show=show))
        if allrev:
            kwargs['wlallrev'] = '1'
        return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs)

    def expandtemplates(self, text, title=None, generatexml=False):
        """
        Takes wikitext (text) and expands templates.

        API doc: https://www.mediawiki.org/wiki/API:Expandtemplates
        """

        kwargs = {}
        if title is None:
            kwargs['title'] = title
        if generatexml:
            kwargs['generatexml'] = '1'

        result = self.get('expandtemplates', text=text, **kwargs)

        if generatexml:
            return result['expandtemplates']['*'], result['parsetree']['*']
        else:
            return result['expandtemplates']['*']

    def ask(self, query, title=None):
        """
        Ask a query against Semantic MediaWiki.

        API doc: https://semantic-mediawiki.org/wiki/Ask_API

        Returns:
            Generator for retrieving all search results
        """
        kwargs = {}
        if title is None:
            kwargs['title'] = title

        offset = 0
        while offset is not None:
            results = self.raw_api('ask', query='{query}|offset={offset}'.format(
                query=query, offset=offset, http_method='GET'), **kwargs)

            offset = results.get('query-continue-offset')
            for result in results['query']['results']:
                yield result
Example #4
0
class Site(object):
    """A MediaWiki site identified by its hostname.

        >>> import mwclient
        >>> site = mwclient.Site('en.wikipedia.org')

    Do not include the leading "http://".

    Mwclient assumes that the script path (where index.php and api.php are located)
    is '/w/'. If the site uses a different script path, you must specify this
    (path must end in a '/').

    Examples:

        >>> site = mwclient.Site('vim.wikia.com', path='/')
        >>> site = mwclient.Site('sourceforge.net', path='/apps/mediawiki/mwclient/')

    """
    api_limit = 500

    def __init__(self,
                 host,
                 path='/w/',
                 ext='.php',
                 pool=None,
                 retry_timeout=30,
                 max_retries=25,
                 wait_callback=lambda *x: None,
                 clients_useragent=None,
                 max_lag=3,
                 compress=True,
                 force_login=True,
                 do_init=True,
                 httpauth=None,
                 reqs=None,
                 consumer_token=None,
                 consumer_secret=None,
                 access_token=None,
                 access_secret=None,
                 client_certificate=None,
                 custom_headers=None):
        # Setup member variables
        self.host = host
        self.path = path
        self.ext = ext
        self.credentials = None
        self.compress = compress
        self.max_lag = text_type(max_lag)
        self.force_login = force_login
        self.requests = reqs or {}

        if consumer_token is not None:
            auth = OAuth1(consumer_token, consumer_secret, access_token,
                          access_secret)
        elif isinstance(httpauth, (list, tuple)):
            auth = HTTPBasicAuth(*httpauth)
        elif httpauth is None or isinstance(httpauth, (AuthBase, )):
            auth = httpauth
        else:
            raise RuntimeError(
                'Authentication is not a tuple or an instance of AuthBase')

        self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback)

        # Site properties
        self.blocked = False  # Whether current user is blocked
        self.hasmsg = False  # Whether current user has new messages
        self.groups = []  # Groups current user belongs to
        self.rights = []  # Rights current user has
        self.tokens = {}  # Edit tokens of the current user
        self.version = None

        self.namespaces = self.default_namespaces
        self.writeapi = False

        # Setup connection
        if pool is None:
            self.connection = requests.Session()
            self.connection.auth = auth
            if client_certificate:
                self.connection.cert = client_certificate

            prefix = '{} - '.format(
                clients_useragent) if clients_useragent else ''
            self.connection.headers['User-Agent'] = (
                '{prefix}MwClient/{ver} ({url})'.format(
                    prefix=prefix,
                    ver=__ver__,
                    url='https://github.com/mwclient/mwclient'))
            if custom_headers:
                self.connection.headers.update(custom_headers)
        else:
            self.connection = pool

        # Page generators
        self.pages = listing.PageList(self)
        self.categories = listing.PageList(self, namespace=14)
        self.images = listing.PageList(self, namespace=6)

        # Compat page generators
        self.Pages = self.pages
        self.Categories = self.categories
        self.Images = self.images

        # Initialization status
        self.initialized = False

        # Upload chunk size in bytes
        self.chunk_size = 1048576

        if do_init:
            try:
                self.site_init()
            except errors.APIError as e:
                if e.args[0] == 'mwoauth-invalid-authorization':
                    raise errors.OAuthAuthorizationError(e.code, e.info)

                # Private wiki, do init after login
                if e.args[0] not in {u'unknown_action', u'readapidenied'}:
                    raise

    def site_init(self):

        if self.initialized:
            info = self.get('query', meta='userinfo', uiprop='groups|rights')
            userinfo = info['query']['userinfo']
            self.username = userinfo['name']
            self.groups = userinfo.get('groups', [])
            self.rights = userinfo.get('rights', [])
            self.tokens = {}
            return

        meta = self.get('query',
                        meta='siteinfo|userinfo',
                        siprop='general|namespaces',
                        uiprop='groups|rights',
                        retry_on_error=False)

        # Extract site info
        self.site = meta['query']['general']
        self.namespaces = {
            namespace['id']: namespace.get('*', '')
            for namespace in six.itervalues(meta['query']['namespaces'])
        }
        self.writeapi = 'writeapi' in self.site

        self.version = self.version_tuple_from_generator(
            self.site['generator'])

        # Require MediaWiki version >= 1.16
        self.require(1, 16)

        # User info
        userinfo = meta['query']['userinfo']
        self.username = userinfo['name']
        self.groups = userinfo.get('groups', [])
        self.rights = userinfo.get('rights', [])
        self.initialized = True

    @staticmethod
    def version_tuple_from_generator(string, prefix='MediaWiki '):
        """Return a version tuple from a MediaWiki Generator string.

        Example:
            "MediaWiki 1.5.1" → (1, 5, 1)

        Args:
            prefix (str): The expected prefix of the string
        """
        if not string.startswith(prefix):
            raise errors.MediaWikiVersionError(
                'Unknown generator {}'.format(string))

        version = string[len(prefix):].split('.')

        def split_num(s):
            """Split the string on the first non-digit character.

            Returns:
                A tuple of the digit part as int and, if available,
                the rest of the string.
            """
            i = 0
            while i < len(s):
                if s[i] < '0' or s[i] > '9':
                    break
                i += 1
            if s[i:]:
                return (
                    int(s[:i]),
                    s[i:],
                )
            else:
                return (int(s[:i]), )

        version_tuple = sum((split_num(s) for s in version), ())

        if len(version_tuple) < 2:
            raise errors.MediaWikiVersionError('Unknown MediaWiki {}'.format(
                '.'.join(version)))

        return version_tuple

    default_namespaces = {
        0: u'',
        1: u'Talk',
        2: u'User',
        3: u'User talk',
        4: u'Project',
        5: u'Project talk',
        6: u'Image',
        7: u'Image talk',
        8: u'MediaWiki',
        9: u'MediaWiki talk',
        10: u'Template',
        11: u'Template talk',
        12: u'Help',
        13: u'Help talk',
        14: u'Category',
        15: u'Category talk',
        -1: u'Special',
        -2: u'Media'
    }

    def __repr__(self):
        return "<Site object '%s%s'>" % (self.host, self.path)

    def get(self, action, *args, **kwargs):
        """Perform a generic API call using GET.

        This is just a shorthand for calling api() with http_method='GET'.
        All arguments will be passed on.

        Returns:
            The raw response from the API call, as a dictionary.
        """
        return self.api(action, 'GET', *args, **kwargs)

    def post(self, action, *args, **kwargs):
        """Perform a generic API call using POST.

        This is just a shorthand for calling api() with http_method='POST'.
        All arguments will be passed on.

        Returns:
            The raw response from the API call, as a dictionary.
        """
        return self.api(action, 'POST', *args, **kwargs)

    def api(self, action, http_method='POST', *args, **kwargs):
        """Perform a generic API call and handle errors.

        All arguments will be passed on.

        Example:
            To get coordinates from the GeoData MediaWiki extension at English Wikipedia:

            >>> site = Site('en.wikipedia.org')
            >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen')
            >>> for page in result['query']['pages'].values():
            ...     if 'coordinates' in page:
            ...         print '{} {} {}'.format(page['title'],
            ...             page['coordinates'][0]['lat'],
            ...             page['coordinates'][0]['lon'])
            Oslo 59.95 10.75
            Copenhagen 55.6761 12.5683

        Returns:
            The raw response from the API call, as a dictionary.
        """
        kwargs.update(args)

        if action == 'query' and 'continue' not in kwargs:
            kwargs['continue'] = ''
        if action == 'query':
            if 'meta' in kwargs:
                kwargs['meta'] += '|userinfo'
            else:
                kwargs['meta'] = 'userinfo'
            if 'uiprop' in kwargs:
                kwargs['uiprop'] += '|blockinfo|hasmsg'
            else:
                kwargs['uiprop'] = 'blockinfo|hasmsg'

        sleeper = self.sleepers.make()

        while True:
            info = self.raw_api(action, http_method, **kwargs)
            if not info:
                info = {}
            if self.handle_api_result(info, sleeper=sleeper):
                return info

    def handle_api_result(self, info, kwargs=None, sleeper=None):
        if sleeper is None:
            sleeper = self.sleepers.make()

        try:
            userinfo = info['query']['userinfo']
        except KeyError:
            userinfo = ()
        if 'blockedby' in userinfo:
            self.blocked = (userinfo['blockedby'],
                            userinfo.get('blockreason', u''))
        else:
            self.blocked = False
        self.hasmsg = 'messages' in userinfo
        self.logged_in = 'anon' not in userinfo
        if 'warnings' in info:
            for module, warning in info['warnings'].items():
                if '*' in warning:
                    log.warning(warning['*'])

        if 'error' in info:
            if info['error'].get('code') in {
                    u'internal_api_error_DBConnectionError',
                    u'internal_api_error_DBQueryError'
            }:
                sleeper.sleep()
                return False

            # cope with https://phabricator.wikimedia.org/T106066
            if (info['error'].get('code') == u'mwoauth-invalid-authorization'
                    and 'Nonce already used' in info['error'].get('info')):
                log.warning(
                    'retrying due to nonce error https://phabricator.wikimedia.org/T106066'
                )
                sleeper.sleep()
                return False

            if 'query' in info['error']:
                # Semantic Mediawiki does not follow the standard error format
                raise errors.APIError(None, info['error']['query'], kwargs)

            if '*' in info['error']:
                raise errors.APIError(info['error']['code'],
                                      info['error']['info'],
                                      info['error']['*'])
            raise errors.APIError(info['error']['code'], info['error']['info'],
                                  kwargs)
        return True

    @staticmethod
    def _query_string(*args, **kwargs):
        kwargs.update(args)
        qs1 = [(k, v) for k, v in six.iteritems(kwargs)
               if k not in {'wpEditToken', 'token'}]
        qs2 = [(k, v) for k, v in six.iteritems(kwargs)
               if k in {'wpEditToken', 'token'}]
        return OrderedDict(qs1 + qs2)

    def raw_call(self,
                 script,
                 data,
                 files=None,
                 retry_on_error=True,
                 http_method='POST'):
        """
        Perform a generic request and return the raw text.

        In the event of a network problem, or a HTTP response with status code 5XX,
        we'll wait and retry the configured number of times before giving up
        if `retry_on_error` is True.

        `requests.exceptions.HTTPError` is still raised directly for
        HTTP responses with status codes in the 4XX range, and invalid
        HTTP responses.

        Args:
            script (str): Script name, usually 'api'.
            data (dict): Post data
            files (dict): Files to upload
            retry_on_error (bool): Retry on connection error

        Returns:
            The raw text response.
        """
        headers = {}
        if self.compress and gzip:
            headers['Accept-Encoding'] = 'gzip'
        sleeper = self.sleepers.make((script, data))

        scheme = 'https'
        host = self.host
        if isinstance(host, (list, tuple)):
            scheme, host = host

        url = '{scheme}://{host}{path}{script}{ext}'.format(scheme=scheme,
                                                            host=host,
                                                            path=self.path,
                                                            script=script,
                                                            ext=self.ext)

        while True:
            try:
                if http_method == 'GET':
                    stream = self.connection.get(url,
                                                 params=data,
                                                 files=files,
                                                 headers=headers,
                                                 **self.requests)
                else:
                    stream = self.connection.post(url,
                                                  data=data,
                                                  files=files,
                                                  headers=headers,
                                                  **self.requests)
                if stream.headers.get('x-database-lag'):
                    wait_time = int(stream.headers.get('retry-after'))
                    log.warning('Database lag exceeds max lag. '
                                'Waiting for {} seconds'.format(wait_time))
                    sleeper.sleep(wait_time)
                elif stream.status_code == 200:
                    return stream.text
                elif stream.status_code < 500 or stream.status_code > 599:
                    stream.raise_for_status()
                else:
                    if not retry_on_error:
                        stream.raise_for_status()
                    log.warning('Received {status} response: {text}. '
                                'Retrying in a moment.'.format(
                                    status=stream.status_code,
                                    text=stream.text))
                    sleeper.sleep()

            except requests.exceptions.ConnectionError:
                # In the event of a network problem
                # (e.g. DNS failure, refused connection, etc),
                # Requests will raise a ConnectionError exception.
                if not retry_on_error:
                    raise
                log.warning('Connection error. Retrying in a moment.')
                sleeper.sleep()

    def raw_api(self, action, http_method='POST', *args, **kwargs):
        """Send a call to the API."""
        try:
            retry_on_error = kwargs.pop('retry_on_error')
        except KeyError:
            retry_on_error = True
        kwargs['action'] = action
        kwargs['format'] = 'json'
        data = self._query_string(*args, **kwargs)
        res = self.raw_call('api',
                            data,
                            retry_on_error=retry_on_error,
                            http_method=http_method)

        try:
            return json.loads(res, object_pairs_hook=OrderedDict)
        except ValueError:
            if res.startswith('MediaWiki API is not enabled for this site.'):
                raise errors.APIDisabledError
            raise errors.InvalidResponse(res)

    def raw_index(self, action, http_method='POST', *args, **kwargs):
        """Sends a call to index.php rather than the API."""
        kwargs['action'] = action
        kwargs['maxlag'] = self.max_lag
        data = self._query_string(*args, **kwargs)
        return self.raw_call('index', data, http_method=http_method)

    def require(self, major, minor, revision=None, raise_error=True):
        if self.version is None:
            if raise_error is None:
                return
            raise RuntimeError('Site %s has not yet been initialized' %
                               repr(self))

        if revision is None:
            if self.version[:2] >= (major, minor):
                return True
            elif raise_error:
                raise errors.MediaWikiVersionError(
                    'Requires version {required[0]}.{required[1]}, '
                    'current version is {current[0]}.{current[1]}'.format(
                        required=(major, minor), current=(self.version[:2])))
            else:
                return False
        else:
            raise NotImplementedError

    # Actions
    def email(self, user, text, subject, cc=False):
        """
        Send email to a specified user on the wiki.

            >>> try:
            ...     site.email('SomeUser', 'Some message', 'Some subject')
            ... except mwclient.errors.NoSpecifiedEmailError as e:
            ...     print 'The user does not accept email, or has not specified an email address.'

        Args:
            user (str): User name of the recipient
            text (str): Body of the email
            subject (str): Subject of the email
            cc (bool): True to send a copy of the email to yourself (default is False)

        Returns:
            Dictionary of the JSON response

        Raises:
            NoSpecifiedEmailError (mwclient.errors.NoSpecifiedEmailError): if recipient does not accept email
            EmailError (mwclient.errors.EmailError): on other errors
        """

        token = self.get_token('email')

        try:
            info = self.post('emailuser',
                             target=user,
                             subject=subject,
                             text=text,
                             ccme=cc,
                             token=token)
        except errors.APIError as e:
            if e.args[0] == u'noemail':
                raise errors.NoSpecifiedEmail(user, e.args[1])
            raise errors.EmailError(*e)

        return info

    def login(self, username=None, password=None, cookies=None, domain=None):
        """Login to the wiki."""

        if username and password:
            self.credentials = (username, password, domain)
        if cookies:
            self.connection.cookies.update(cookies)

        if self.credentials:
            sleeper = self.sleepers.make()
            kwargs = {
                'lgname': self.credentials[0],
                'lgpassword': self.credentials[1]
            }
            if self.credentials[2]:
                kwargs['lgdomain'] = self.credentials[2]

            # Try to login using the scheme for MW 1.27+. If the wiki is read protected,
            # it is not possible to get the wiki version upfront using the API, so we just
            # have to try. If the attempt fails, we try the old method.
            try:
                kwargs['lgtoken'] = self.get_token('login')
            except (errors.APIError, KeyError):
                log.debug(
                    'Failed to get login token, MediaWiki is older than 1.27.')

            while True:
                login = self.post('login', **kwargs)

                if login['login']['result'] == 'Success':
                    break
                elif login['login']['result'] == 'NeedToken':
                    kwargs['lgtoken'] = login['login']['token']
                elif login['login']['result'] == 'Throttled':
                    sleeper.sleep(int(login['login'].get('wait', 5)))
                else:
                    raise errors.LoginError(self, login['login'])

        self.site_init()

    def get_token(self, type, force=False, title=None):

        if self.version is None or self.version[:2] >= (1, 24):
            # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces
            # the majority of older tokens, like edittoken and movetoken.
            if type not in {
                    'watch', 'patrol', 'rollback', 'userrights', 'login'
            }:
                type = 'csrf'

        if type not in self.tokens:
            self.tokens[type] = '0'

        if self.tokens.get(type, '0') == '0' or force:

            if self.version is None or self.version[:2] >= (1, 24):
                # We use raw_api() rather than api() because api() is adding "userinfo"
                # to the query and this raises an readapideniederror if the wiki is read
                # protected and we're trying to fetch a login token.
                info = self.raw_api('query', 'GET', meta='tokens', type=type)

                self.handle_api_result(info)

                # Note that for read protected wikis, we don't know the version when
                # fetching the login token. If it's < 1.27, the request below will
                # raise a KeyError that we should catch.
                self.tokens[type] = info['query']['tokens']['%stoken' % type]

            else:
                if title is None:
                    # Some dummy title was needed to get a token prior to 1.24
                    title = 'Test'
                info = self.post('query',
                                 titles=title,
                                 prop='info',
                                 intoken=type)
                for i in six.itervalues(info['query']['pages']):
                    if i['title'] == title:
                        self.tokens[type] = i['%stoken' % type]

        return self.tokens[type]

    def upload(self,
               file=None,
               filename=None,
               description='',
               ignore=False,
               file_size=None,
               url=None,
               filekey=None,
               comment=None):
        """Upload a file to the site.

        Note that one of `file`, `filekey` and `url` must be specified, but not
        more than one. For normal uploads, you specify `file`.

        Args:
            file (str): File object or stream to upload.
            filename (str): Destination filename, don't include namespace
                            prefix like 'File:'
            description (str): Wikitext for the file description page.
            ignore (bool): True to upload despite any warnings.
            file_size (int): Deprecated in mwclient 0.7
            url (str): URL to fetch the file from.
            filekey (str): Key that identifies a previous upload that was
                           stashed temporarily.
            comment (str): Upload comment. Also used as the initial page text
                           for new files if `description` is not specified.

        Example:

            >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg',
                              description='Some description')

        Returns:
            JSON result from the API.

        Raises:
            errors.InsufficientPermission
            requests.exceptions.HTTPError
        """

        if file_size is not None:
            # Note that DeprecationWarning is hidden by default since Python 2.7
            warnings.warn('file_size is deprecated since mwclient 0.7',
                          DeprecationWarning)

        if filename is None:
            raise TypeError('filename must be specified')

        if len([x for x in [file, filekey, url] if x is not None]) != 1:
            raise TypeError(
                "exactly one of 'file', 'filekey' and 'url' must be specified")

        image = self.Images[filename]
        if not image.can('upload'):
            raise errors.InsufficientPermission(filename)

        if comment is None:
            comment = description
            text = None
        else:
            comment = comment
            text = description

        if file is not None:
            if not hasattr(file, 'read'):
                file = open(file, 'rb')

            content_size = file.seek(0, 2)
            file.seek(0)

            if self.version[:2] >= (1, 20) and content_size > self.chunk_size:
                return self.chunk_upload(file, filename, ignore, comment, text)

        predata = {
            'action': 'upload',
            'format': 'json',
            'filename': filename,
            'comment': comment,
            'text': text,
            'token': image.get_token('edit'),
        }

        if ignore:
            predata['ignorewarnings'] = 'true'
        if url:
            predata['url'] = url

        # sessionkey was renamed to filekey in MediaWiki 1.18
        # https://phabricator.wikimedia.org/rMW5f13517e36b45342f228f3de4298bb0fe186995d
        if self.version[:2] < (1, 18):
            predata['sessionkey'] = filekey
        else:
            predata['filekey'] = filekey

        postdata = predata
        files = None
        if file is not None:

            # Workaround for https://github.com/mwclient/mwclient/issues/65
            # ----------------------------------------------------------------
            # Since the filename in Content-Disposition is not interpreted,
            # we can send some ascii-only dummy name rather than the real
            # filename, which might contain non-ascii.
            files = {'file': ('fake-filename', file)}

        sleeper = self.sleepers.make()
        while True:
            data = self.raw_call('api', postdata, files)
            info = json.loads(data)
            if not info:
                info = {}
            if self.handle_api_result(info, kwargs=predata, sleeper=sleeper):
                response = info.get('upload', {})
                break
        if file is not None:
            file.close()
        return response

    def chunk_upload(self, file, filename, ignorewarnings, comment, text):
        """Upload a file to the site in chunks.

        This method is called by `Site.upload` if you are connecting to a newer
        MediaWiki installation, so it's normally not necessary to call this
        method directly.

        Args:
            file (file-like object): File object or stream to upload.
            params (dict): Dict containing upload parameters.
        """
        image = self.Images[filename]

        content_size = file.seek(0, 2)
        file.seek(0)

        params = {
            'action': 'upload',
            'format': 'json',
            'stash': 1,
            'offset': 0,
            'filename': filename,
            'filesize': content_size,
            'token': image.get_token('edit'),
        }
        if ignorewarnings:
            params['ignorewarnings'] = 'true'

        sleeper = self.sleepers.make()
        offset = 0
        for chunk in read_in_chunks(file, self.chunk_size):
            while True:
                data = self.raw_call('api', params, files={'chunk': chunk})
                info = json.loads(data)
                if self.handle_api_result(info, kwargs=params,
                                          sleeper=sleeper):
                    response = info.get('upload', {})
                    break

            offset += chunk.tell()
            chunk.close()
            log.debug('%s: Uploaded %d of %d bytes', filename, offset,
                      content_size)
            params['filekey'] = response['filekey']
            if response['result'] == 'Continue':
                params['offset'] = response['offset']
            elif response['result'] == 'Success':
                file.close()
                break
            else:
                # Some kind or error or warning occured. In any case, we do not
                # get the parameters we need to continue, so we should return
                # the response now.
                file.close()
                return response

        del params['action']
        del params['stash']
        del params['offset']
        params['comment'] = comment
        params['text'] = text
        return self.post('upload', **params)

    def parse(self,
              text=None,
              title=None,
              page=None,
              prop=None,
              redirects=False,
              mobileformat=False):
        kwargs = {}
        if text is not None:
            kwargs['text'] = text
        if title is not None:
            kwargs['title'] = title
        if page is not None:
            kwargs['page'] = page
        if prop is not None:
            kwargs['prop'] = prop
        if redirects:
            kwargs['redirects'] = '1'
        if mobileformat:
            kwargs['mobileformat'] = '1'
        result = self.post('parse', **kwargs)
        return result['parse']

    # def block(self): TODO?
    # def unblock: TODO?
    # def patrol: TODO?
    # def import: TODO?

    # Lists
    def allpages(self,
                 start=None,
                 prefix=None,
                 namespace='0',
                 filterredir='all',
                 minsize=None,
                 maxsize=None,
                 prtype=None,
                 prlevel=None,
                 limit=None,
                 dir='ascending',
                 filterlanglinks='all',
                 generator=True,
                 end=None):
        """Retrieve all pages on the wiki as a generator."""

        pfx = listing.List.get_prefix('ap', generator)
        kwargs = dict(
            listing.List.generate_kwargs(
                pfx,
                ('from', start),
                ('to', end),
                prefix=prefix,
                minsize=minsize,
                maxsize=maxsize,
                prtype=prtype,
                prlevel=prlevel,
                namespace=namespace,
                filterredir=filterredir,
                dir=dir,
                filterlanglinks=filterlanglinks,
            ))
        return listing.List.get_list(generator)(self,
                                                'allpages',
                                                'ap',
                                                limit=limit,
                                                return_values='title',
                                                **kwargs)

    def allimages(self,
                  start=None,
                  prefix=None,
                  minsize=None,
                  maxsize=None,
                  limit=None,
                  dir='ascending',
                  sha1=None,
                  sha1base36=None,
                  generator=True,
                  end=None):
        """Retrieve all images on the wiki as a generator."""

        pfx = listing.List.get_prefix('ai', generator)
        kwargs = dict(
            listing.List.generate_kwargs(
                pfx,
                ('from', start),
                ('to', end),
                prefix=prefix,
                minsize=minsize,
                maxsize=maxsize,
                dir=dir,
                sha1=sha1,
                sha1base36=sha1base36,
            ))
        return listing.List.get_list(generator)(self,
                                                'allimages',
                                                'ai',
                                                limit=limit,
                                                return_values='timestamp|url',
                                                **kwargs)

    def alllinks(self,
                 start=None,
                 prefix=None,
                 unique=False,
                 prop='title',
                 namespace='0',
                 limit=None,
                 generator=True,
                 end=None):
        """Retrieve a list of all links on the wiki as a generator."""

        pfx = listing.List.get_prefix('al', generator)
        kwargs = dict(
            listing.List.generate_kwargs(pfx, ('from', start), ('to', end),
                                         prefix=prefix,
                                         prop=prop,
                                         namespace=namespace))
        if unique:
            kwargs[pfx + 'unique'] = '1'
        return listing.List.get_list(generator)(self,
                                                'alllinks',
                                                'al',
                                                limit=limit,
                                                return_values='title',
                                                **kwargs)

    def allcategories(self,
                      start=None,
                      prefix=None,
                      dir='ascending',
                      limit=None,
                      generator=True,
                      end=None):
        """Retrieve all categories on the wiki as a generator."""

        pfx = listing.List.get_prefix('ac', generator)
        kwargs = dict(
            listing.List.generate_kwargs(pfx, ('from', start), ('to', end),
                                         prefix=prefix,
                                         dir=dir))
        return listing.List.get_list(generator)(self,
                                                'allcategories',
                                                'ac',
                                                limit=limit,
                                                **kwargs)

    def allusers(self,
                 start=None,
                 prefix=None,
                 group=None,
                 prop=None,
                 limit=None,
                 witheditsonly=False,
                 activeusers=False,
                 rights=None,
                 end=None):
        """Retrieve all users on the wiki as a generator."""

        kwargs = dict(
            listing.List.generate_kwargs('au', ('from', start), ('to', end),
                                         prefix=prefix,
                                         group=group,
                                         prop=prop,
                                         rights=rights,
                                         witheditsonly=witheditsonly,
                                         activeusers=activeusers))
        return listing.List(self, 'allusers', 'au', limit=limit, **kwargs)

    def blocks(self,
               start=None,
               end=None,
               dir='older',
               ids=None,
               users=None,
               limit=None,
               prop='id|user|by|timestamp|expiry|reason|flags'):
        """Retrieve blocks as a generator.

        Each block is a dictionary containing:

        - user: the username or IP address of the user
        - id: the ID of the block
        - timestamp: when the block was added
        - expiry: when the block runs out (infinity for indefinite blocks)
        - reason: the reason they are blocked
        - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page
        - by: the administrator who blocked the user
        - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled.

        """

        # TODO: Fix. Fix what?
        kwargs = dict(
            listing.List.generate_kwargs('bk',
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         ids=ids,
                                         users=users,
                                         prop=prop))
        return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs)

    def deletedrevisions(self,
                         start=None,
                         end=None,
                         dir='older',
                         namespace=None,
                         limit=None,
                         prop='user|comment'):
        # TODO: Fix

        kwargs = dict(
            listing.List.generate_kwargs('dr',
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         namespace=namespace,
                                         prop=prop))
        return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs)

    def exturlusage(self,
                    query,
                    prop=None,
                    protocol='http',
                    namespace=None,
                    limit=None):
        r"""Retrieve the list of pages that link to a particular domain or URL, as a generator.

        This API call mirrors the Special:LinkSearch function on-wiki.

        Query can be a domain like 'bbc.co.uk'.
        Wildcards can be used, e.g. '\*.bbc.co.uk'.
        Alternatively, a query can contain a full domain name and some or all of a URL:
        e.g. '\*.wikipedia.org/wiki/\*'

        See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details.

        The generator returns dictionaries containing three keys:
        - url: the URL linked to.
        - ns: namespace of the wiki page
        - pageid: the ID of the wiki page
        - title: the page title.

        """

        kwargs = dict(
            listing.List.generate_kwargs('eu',
                                         query=query,
                                         prop=prop,
                                         protocol=protocol,
                                         namespace=namespace))
        return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs)

    def logevents(self,
                  type=None,
                  prop=None,
                  start=None,
                  end=None,
                  dir='older',
                  user=None,
                  title=None,
                  limit=None,
                  action=None):
        """Retrieve logevents as a generator."""
        kwargs = dict(
            listing.List.generate_kwargs('le',
                                         prop=prop,
                                         type=type,
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         user=user,
                                         title=title,
                                         action=action))
        return listing.List(self, 'logevents', 'le', limit=limit, **kwargs)

    def checkuserlog(self,
                     user=None,
                     target=None,
                     limit=10,
                     dir='older',
                     start=None,
                     end=None):
        """Retrieve checkuserlog items as a generator."""

        kwargs = dict(
            listing.List.generate_kwargs('cul',
                                         target=target,
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         user=user))
        return listing.NestedList('entries',
                                  self,
                                  'checkuserlog',
                                  'cul',
                                  limit=limit,
                                  **kwargs)

    # def protectedtitles requires 1.15
    def random(self, namespace, limit=20):
        """Retrieve a generator of random pages from a particular namespace.

        limit specifies the number of random articles retrieved.
        namespace is a namespace identifier integer.

        Generator contains dictionary with namespace, page ID and title.

        """

        kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace))
        return listing.List(self, 'random', 'rn', limit=limit, **kwargs)

    def recentchanges(self,
                      start=None,
                      end=None,
                      dir='older',
                      namespace=None,
                      prop=None,
                      show=None,
                      limit=None,
                      type=None,
                      toponly=None):
        """List recent changes to the wiki, à la Special:Recentchanges.
        """
        kwargs = dict(
            listing.List.generate_kwargs('rc',
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         namespace=namespace,
                                         prop=prop,
                                         show=show,
                                         type=type,
                                         toponly='1' if toponly else None))
        return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs)

    def revisions(self,
                  revids,
                  prop='ids|timestamp|flags|comment|user',
                  expandtemplates=False,
                  diffto='prev'):
        """Get data about a list of revisions.

        See also the `Page.revisions()` method.

        API doc: https://www.mediawiki.org/wiki/API:Revisions

        Example: Get revision text for two revisions:

            >>> for revision in site.revisions([689697696, 689816909], prop='content'):
            ...     print revision['*']

        Args:
            revids (list): A list of (max 50) revisions.
            prop (str): Which properties to get for each revision.
            expandtemplates (bool): Expand templates in `rvprop=content` output.
            diffto (str): Revision ID to diff each revision to. Use "prev",
                          "next" and "cur" for the previous, next and current
                          revision respectively.

        Returns:
            A list of revisions
        """
        kwargs = {
            'prop': 'revisions',
            'rvprop': prop,
            'revids': '|'.join(map(text_type, revids))
        }
        if expandtemplates:
            kwargs['rvexpandtemplates'] = '1'
        if diffto:
            kwargs['rvdiffto'] = diffto

        revisions = []
        pages = self.get('query', **kwargs).get('query', {}).get('pages',
                                                                 {}).values()
        for page in pages:
            for revision in page.get('revisions', ()):
                revision['pageid'] = page.get('pageid')
                revision['pagetitle'] = page.get('title')
                revision['timestamp'] = parse_timestamp(revision['timestamp'])
                revisions.append(revision)
        return revisions

    def search(self,
               search,
               namespace='0',
               what=None,
               redirects=False,
               limit=None):
        """Perform a full text search.

        API doc: https://www.mediawiki.org/wiki/API:Search

        Example:
            >>> for result in site.search('prefix:Template:Citation/'):
            ...     print(result.get('title'))

        Args:
            search (str): The query string
            namespace (int): The namespace to search (default: 0)
            what (str): Search scope: 'text' for fulltext, or 'title' for titles only.
                        Depending on the search backend,
                        both options may not be available.
                        For instance
                        `CirrusSearch <https://www.mediawiki.org/wiki/Help:CirrusSearch>`_
                        doesn't support 'title', but instead provides an "intitle:"
                        query string filter.
            redirects (bool): Include redirect pages in the search
                              (option removed in MediaWiki 1.23).

        Returns:
            mwclient.listings.List: Search results iterator
        """
        kwargs = dict(
            listing.List.generate_kwargs('sr',
                                         search=search,
                                         namespace=namespace,
                                         what=what))
        if redirects:
            kwargs['srredirects'] = '1'
        return listing.List(self, 'search', 'sr', limit=limit, **kwargs)

    def usercontributions(self,
                          user,
                          start=None,
                          end=None,
                          dir='older',
                          namespace=None,
                          prop=None,
                          show=None,
                          limit=None):
        """
        List the contributions made by a given user to the wiki, à la Special:Contributions.

        API doc: https://www.mediawiki.org/wiki/API:Usercontribs
        """
        kwargs = dict(
            listing.List.generate_kwargs('uc',
                                         user=user,
                                         start=start,
                                         end=end,
                                         dir=dir,
                                         namespace=namespace,
                                         prop=prop,
                                         show=show))
        return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs)

    def users(self, users, prop='blockinfo|groups|editcount'):
        """
        Get information about a list of users.

        API doc: https://www.mediawiki.org/wiki/API:Users
        """

        return listing.List(self,
                            'users',
                            'us',
                            ususers='|'.join(users),
                            usprop=prop)

    def watchlist(self,
                  allrev=False,
                  start=None,
                  end=None,
                  namespace=None,
                  dir='older',
                  prop=None,
                  show=None,
                  limit=None):
        """
        List the pages on the current user's watchlist.

        API doc: https://www.mediawiki.org/wiki/API:Watchlist
        """

        kwargs = dict(
            listing.List.generate_kwargs('wl',
                                         start=start,
                                         end=end,
                                         namespace=namespace,
                                         dir=dir,
                                         prop=prop,
                                         show=show))
        if allrev:
            kwargs['wlallrev'] = '1'
        return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs)

    def expandtemplates(self, text, title=None, generatexml=False):
        """
        Takes wikitext (text) and expands templates.

        API doc: https://www.mediawiki.org/wiki/API:Expandtemplates
        """

        kwargs = {}
        if title is None:
            kwargs['title'] = title
        if generatexml:
            kwargs['generatexml'] = '1'

        result = self.get('expandtemplates', text=text, **kwargs)

        if generatexml:
            return result['expandtemplates']['*'], result['parsetree']['*']
        else:
            return result['expandtemplates']['*']

    def ask(self, query, title=None):
        """
        Ask a query against Semantic MediaWiki.

        API doc: https://semantic-mediawiki.org/wiki/Ask_API

        Returns:
            Generator for retrieving all search results, with each answer as a dictionary.
            If the query is invalid, an APIError is raised. A valid query with zero
            results will not raise any error.

        Examples:

            >>> query = "[[Category:my cat]]|[[Has name::a name]]|?Has property"
            >>> for answer in site.ask(query):
            >>>     for title, data in answer.items()
            >>>         print(title)
            >>>         print(data)
        """
        kwargs = {}
        if title is None:
            kwargs['title'] = title

        offset = 0
        while offset is not None:
            results = self.raw_api('ask',
                                   query=u'{query}|offset={offset}'.format(
                                       query=query, offset=offset),
                                   http_method='GET',
                                   **kwargs)
            self.handle_api_result(results)  # raises APIError on error
            offset = results.get('query-continue-offset')
            answers = results['query'].get('results') or {}
            for key, value in answers.items():
                yield {key: value}
Example #5
0
class Site(object):
    api_limit = 500

    def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30,
                 max_retries=25, wait_callback=lambda *x: None, clients_useragent=None,
                 max_lag=3, compress=True, force_login=True, do_init=True, httpauth=None):
        # Setup member variables
        self.host = host
        self.path = path
        self.ext = ext
        self.credentials = None
        self.compress = compress
        self.max_lag = text_type(max_lag)
        self.force_login = force_login

        if isinstance(httpauth, (list, tuple)):
            self.httpauth = HTTPBasicAuth(*httpauth)
        elif httpauth is None or isinstance(httpauth, (AuthBase,)):
            self.httpauth = httpauth
        else:
            raise RuntimeError('Authentication is not a tuple or an instance of AuthBase')

        self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback)

        # Site properties
        self.blocked = False    # Whether current user is blocked
        self.hasmsg = False  # Whether current user has new messages
        self.groups = []    # Groups current user belongs to
        self.rights = []    # Rights current user has
        self.tokens = {}    # Edit tokens of the current user
        self.version = None

        self.namespaces = self.default_namespaces
        self.writeapi = False

        # Setup connection
        if pool is None:
            self.connection = requests.Session()
            self.connection.auth = self.httpauth
            self.connection.headers['User-Agent'] = 'MwClient/' + __ver__ + ' (https://github.com/mwclient/mwclient)'
            if clients_useragent:
                self.connection.headers['User-Agent'] = clients_useragent + ' - ' + self.connection.headers['User-Agent']
        else:
            self.connection = pool

        # Page generators
        self.pages = listing.PageList(self)
        self.categories = listing.PageList(self, namespace=14)
        self.images = listing.PageList(self, namespace=6)

        # Compat page generators
        self.Pages = self.pages
        self.Categories = self.categories
        self.Images = self.images

        # Initialization status
        self.initialized = False

        if do_init:
            try:
                self.site_init()
            except errors.APIError as e:
                # Private wiki, do init after login
                if e.args[0] not in (u'unknown_action', u'readapidenied'):
                    raise

    def site_init(self):
        meta = self.api('query', meta='siteinfo|userinfo',
                        siprop='general|namespaces', uiprop='groups|rights', retry_on_error=False)

        # Extract site info
        self.site = meta['query']['general']
        self.namespaces = dict(((i['id'], i.get('*', '')) for i in six.itervalues(meta['query']['namespaces'])))
        self.writeapi = 'writeapi' in self.site

        # Determine version
        if self.site['generator'].startswith('MediaWiki '):
            version = self.site['generator'][10:].split('.')

            def split_num(s):
                i = 0
                while i < len(s):
                    if s[i] < '0' or s[i] > '9':
                        break
                    i += 1
                if s[i:]:
                    return (int(s[:i]), s[i:], )
                else:
                    return (int(s[:i]), )
            self.version = sum((split_num(s) for s in version), ())

            if len(self.version) < 2:
                raise errors.MediaWikiVersionError('Unknown MediaWiki %s' % '.'.join(version))
        else:
            raise errors.MediaWikiVersionError('Unknown generator %s' % self.site['generator'])

        # Require MediaWiki version >= 1.16
        self.require(1, 16)

        # User info
        userinfo = meta['query']['userinfo']
        self.username = userinfo['name']
        self.groups = userinfo.get('groups', [])
        self.rights = userinfo.get('rights', [])
        self.initialized = True

    default_namespaces = {0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project', 5: u'Project talk',
                          6: u'Image', 7: u'Image talk', 8: u'MediaWiki', 9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk',
                          12: u'Help', 13: u'Help talk', 14: u'Category', 15: u'Category talk', -1: u'Special', -2: u'Media'}

    def __repr__(self):
        return "<Site object '%s%s'>" % (self.host, self.path)

    def api(self, action, *args, **kwargs):
        """
        Perform a generic API call and handle errors. All arguments will be passed on.

        Example:
            To get coordinates from the GeoData MediaWiki extension at English Wikipedia:

            >>> site = Site('en.wikipedia.org')
            >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen')
            >>> for page in result['query']['pages'].values():
            ...     if 'coordinates' in page:
            ...         print page['title'], page['coordinates'][0]['lat'], page['coordinates'][0]['lon']
            Oslo 59.95 10.75
            Copenhagen 55.6761 12.5683

        Returns:
            The raw response from the API call, as a dictionary.
        """
        kwargs.update(args)

        if 'continue' not in kwargs:
            kwargs['continue'] = ''
        if action == 'query':
            if 'meta' in kwargs:
                kwargs['meta'] += '|userinfo'
            else:
                kwargs['meta'] = 'userinfo'
            if 'uiprop' in kwargs:
                kwargs['uiprop'] += '|blockinfo|hasmsg'
            else:
                kwargs['uiprop'] = 'blockinfo|hasmsg'

        sleeper = self.sleepers.make()

        while True:
            info = self.raw_api(action, **kwargs)
            if not info:
                info = {}
            if self.handle_api_result(info, sleeper=sleeper):
                return info

    def handle_api_result(self, info, kwargs=None, sleeper=None):
        if sleeper is None:
            sleeper = self.sleepers.make()

        try:
            userinfo = info['query']['userinfo']
        except KeyError:
            userinfo = ()
        if 'blockedby' in userinfo:
            self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u''))
        else:
            self.blocked = False
        self.hasmsg = 'messages' in userinfo
        self.logged_in = 'anon' not in userinfo
        if 'error' in info:
            if info['error']['code'] in (u'internal_api_error_DBConnectionError', u'internal_api_error_DBQueryError'):
                sleeper.sleep()
                return False
            if '*' in info['error']:
                raise errors.APIError(info['error']['code'],
                                      info['error']['info'], info['error']['*'])
            raise errors.APIError(info['error']['code'],
                                  info['error']['info'], kwargs)
        return True

    @staticmethod
    def _query_string(*args, **kwargs):
        kwargs.update(args)
        qs1 = [(k, v) for k, v in six.iteritems(kwargs) if k not in ('wpEditToken', 'token')]
        qs2 = [(k, v) for k, v in six.iteritems(kwargs) if k in ('wpEditToken', 'token')]
        return OrderedDict(qs1 + qs2)

    def raw_call(self, script, data, files=None, retry_on_error=True):
        """
        Perform a generic API call and return the raw text.

        In the event of a network problem, or a HTTP response with status code 5XX,
        we'll wait and retry the configured number of times before giving up
        if `retry_on_error` is True.

        `requests.exceptions.HTTPError` is still raised directly for
        HTTP responses with status codes in the 4XX range, and invalid
        HTTP responses.

        Args:
            script (str): Script name, usually 'api'.
            data (dict): Post data
            files (dict): Files to upload
            retry_on_error (bool): Retry on connection error

        Returns:
            The raw text response.
        """
        url = self.path + script + self.ext
        headers = {}
        if self.compress and gzip:
            headers['Accept-Encoding'] = 'gzip'
        sleeper = self.sleepers.make((script, data))
        while True:
            scheme = 'http'  # Should we move to 'https' as default?
            host = self.host
            if isinstance(host, (list, tuple)):
                scheme, host = host

            fullurl = '{scheme}://{host}{url}'.format(scheme=scheme, host=host, url=url)

            try:
                stream = self.connection.post(fullurl, data=data, files=files, headers=headers)
                if stream.headers.get('x-database-lag'):
                    wait_time = int(stream.headers.get('retry-after'))
                    log.warning('Database lag exceeds max lag. Waiting for %d seconds', wait_time)
                    sleeper.sleep(wait_time)
                elif stream.status_code == 200:
                    return stream.text
                elif stream.status_code < 500 or stream.status_code > 599:
                    stream.raise_for_status()
                else:
                    if not retry_on_error:
                        stream.raise_for_status()
                    log.warning('Received %s response: %s. Retrying in a moment.', stream.status_code, stream.text)
                    sleeper.sleep()

            except requests.exceptions.ConnectionError:
                # In the event of a network problem (e.g. DNS failure, refused connection, etc),
                # Requests will raise a ConnectionError exception.
                if not retry_on_error:
                    raise
                log.warning('Connection error. Retrying in a moment.')
                sleeper.sleep()

    def raw_api(self, action, *args, **kwargs):
        """Sends a call to the API."""
        try:
            retry_on_error = kwargs.pop('retry_on_error')
        except KeyError:
            retry_on_error = True
        kwargs['action'] = action
        kwargs['format'] = 'json'
        data = self._query_string(*args, **kwargs)
        res = self.raw_call('api', data, retry_on_error=retry_on_error)

        try:
            return json.loads(res)
        except ValueError:
            if res.startswith('MediaWiki API is not enabled for this site.'):
                raise errors.APIDisabledError
            raise errors.InvalidResponse(res)

    def raw_index(self, action, *args, **kwargs):
        """Sends a call to index.php rather than the API."""
        kwargs['action'] = action
        kwargs['maxlag'] = self.max_lag
        data = self._query_string(*args, **kwargs)
        return self.raw_call('index', data)

    def require(self, major, minor, revision=None, raise_error=True):
        if self.version is None:
            if raise_error is None:
                return
            raise RuntimeError('Site %s has not yet been initialized' % repr(self))

        if revision is None:
            if self.version[:2] >= (major, minor):
                return True
            elif raise_error:
                raise errors.MediaWikiVersionError('Requires version %s.%s, current version is %s.%s'
                                                   % ((major, minor) + self.version[:2]))
            else:
                return False
        else:
            raise NotImplementedError

    # Actions
    def email(self, user, text, subject, cc=False):
        """
        Send email to a specified user on the wiki.

            >>> try:
            ...     site.email('SomeUser', 'Some message', 'Some subject')
            ... except mwclient.errors.NoSpecifiedEmailError as e:
            ...     print 'The user does not accept email, or has not specified an email address.'

        Args:
            user (str): User name of the recipient
            text (str): Body of the email
            subject (str): Subject of the email
            cc (bool): True to send a copy of the email to yourself (default is False)

        Returns:
            Dictionary of the JSON response

        Raises:
            NoSpecifiedEmailError (mwclient.errors.NoSpecifiedEmailError): if recipient does not accept email
            EmailError (mwclient.errors.EmailError): on other errors
        """

        token = self.get_token('email')

        try:
            info = self.api('emailuser', target=user, subject=subject,
                            text=text, ccme=cc, token=token)
        except errors.APIError as e:
            if e.args[0] == u'noemail':
                raise errors.NoSpecifiedEmail(user, e.args[1])
            raise errors.EmailError(*e)

        return info

    def login(self, username=None, password=None, cookies=None, domain=None):
        """Login to the wiki."""

        if username and password:
            self.credentials = (username, password, domain)
        if cookies:
            self.connection.cookies.update(cookies)

        if self.credentials:
            sleeper = self.sleepers.make()
            kwargs = {
                'lgname': self.credentials[0],
                'lgpassword': self.credentials[1]
            }
            if self.credentials[2]:
                kwargs['lgdomain'] = self.credentials[2]
            while True:
                login = self.api('login', **kwargs)
                if login['login']['result'] == 'Success':
                    break
                elif login['login']['result'] == 'NeedToken':
                    kwargs['lgtoken'] = login['login']['token']
                elif login['login']['result'] == 'Throttled':
                    sleeper.sleep(int(login['login'].get('wait', 5)))
                else:
                    raise errors.LoginError(self, login['login'])

        if self.initialized:
            info = self.api('query', meta='userinfo', uiprop='groups|rights')
            userinfo = info['query']['userinfo']
            self.username = userinfo['name']
            self.groups = userinfo.get('groups', [])
            self.rights = userinfo.get('rights', [])
            self.tokens = {}
        else:
            self.site_init()

    def get_token(self, type, force=False, title=None):

        if self.version[:2] >= (1, 24):
            # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces
            # the majority of older tokens, like edittoken and movetoken.
            if type not in ['watch', 'patrol', 'rollback', 'userrights']:
                type = 'csrf'

        if type not in self.tokens:
            self.tokens[type] = '0'

        if self.tokens.get(type, '0') == '0' or force:

            if self.version[:2] >= (1, 24):
                info = self.api('query', meta='tokens', type=type)
                self.tokens[type] = info['query']['tokens']['%stoken' % type]

            else:
                if title is None:
                    # Some dummy title was needed to get a token prior to 1.24
                    title = 'Test'
                info = self.api('query', titles=title,
                                prop='info', intoken=type)
                for i in six.itervalues(info['query']['pages']):
                    if i['title'] == title:
                        self.tokens[type] = i['%stoken' % type]

        return self.tokens[type]

    def upload(self, file=None, filename=None, description='', ignore=False, file_size=None,
               url=None, filekey=None, comment=None):
        """
        Uploads a file to the site. Returns JSON result from the API.
        Can raise `errors.InsufficientPermission` and `requests.exceptions.HTTPError`.

        : Parameters :
          - file         : File object or stream to upload.
          - filename     : Destination filename, don't include namespace
                           prefix like 'File:'
          - description  : Wikitext for the file description page.
          - ignore       : True to upload despite any warnings.
          - file_size    : Deprecated in mwclient 0.7
          - url          : URL to fetch the file from.
          - filekey      : Key that identifies a previous upload that was
                           stashed temporarily.
          - comment      : Upload comment. Also used as the initial page text
                           for new files if `description` is not specified.

        Note that one of `file`, `filekey` and `url` must be specified, but not more
        than one. For normal uploads, you specify `file`.

        Example:

        >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg',
                          description='Some description')
        """

        if file_size is not None:
            # Note that DeprecationWarning is hidden by default since Python 2.7
            warnings.warn(
                'file_size is deprecated since mwclient 0.7',
                DeprecationWarning
            )
            file_size = None

        if filename is None:
            raise TypeError('filename must be specified')

        if len([x for x in [file, filekey, url] if x is not None]) != 1:
            raise TypeError("exactly one of 'file', 'filekey' and 'url' must be specified")

        image = self.Images[filename]
        if not image.can('upload'):
            raise errors.InsufficientPermission(filename)

        predata = {}

        if comment is None:
            predata['comment'] = description
        else:
            predata['comment'] = comment
            predata['text'] = description

        if ignore:
            predata['ignorewarnings'] = 'true'
        predata['token'] = image.get_token('edit')
        predata['action'] = 'upload'
        predata['format'] = 'json'
        predata['filename'] = filename
        if url:
            predata['url'] = url

        # Renamed from sessionkey to filekey
        # https://git.wikimedia.org/commit/mediawiki%2Fcore.git/5f13517e
        if self.version[:2] < (1, 18):
            predata['sessionkey'] = filekey
        else:
            predata['filekey'] = filekey

        postdata = predata
        files = None
        if file is not None:

            # Workaround for https://github.com/mwclient/mwclient/issues/65
            # ----------------------------------------------------------------
            # Since the filename in Content-Disposition is not interpreted,
            # we can send some ascii-only dummy name rather than the real
            # filename, which might contain non-ascii.
            file = ('fake-filename', file)
            # End of workaround
            # ----------------------------------------------------------------

            files = {'file': file}

        sleeper = self.sleepers.make()
        while True:
            data = self.raw_call('api', postdata, files)
            info = json.loads(data)
            if not info:
                info = {}
            if self.handle_api_result(info, kwargs=predata, sleeper=sleeper):
                return info.get('upload', {})

    def parse(self, text=None, title=None, page=None):
        kwargs = {}
        if text is not None:
            kwargs['text'] = text
        if title is not None:
            kwargs['title'] = title
        if page is not None:
            kwargs['page'] = page
        result = self.api('parse', **kwargs)
        return result['parse']

    # def block(self): TODO?
    # def unblock: TODO?
    # def patrol: TODO?
    # def import: TODO?

    # Lists
    def allpages(self, start=None, prefix=None, namespace='0', filterredir='all',
                 minsize=None, maxsize=None, prtype=None, prlevel=None,
                 limit=None, dir='ascending', filterlanglinks='all', generator=True):
        """Retrieve all pages on the wiki as a generator."""

        pfx = listing.List.get_prefix('ap', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix,
                                                   minsize=minsize, maxsize=maxsize, prtype=prtype, prlevel=prlevel,
                                                   namespace=namespace, filterredir=filterredir, dir=dir,
                                                   filterlanglinks=filterlanglinks))
        return listing.List.get_list(generator)(self, 'allpages', 'ap', limit=limit, return_values='title', **kwargs)

    def allimages(self, start=None, prefix=None, minsize=None, maxsize=None, limit=None,
                  dir='ascending', sha1=None, sha1base36=None, prop='timestamp|url',
                  generator=True):
        """Retrieve all images on the wiki as a generator."""

        pfx = listing.List.get_prefix('ai', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix,
                                                   minsize=minsize, maxsize=maxsize,
                                                   dir=dir, sha1=sha1, sha1base36=sha1base36))
        return listing.List.get_list(generator)(self, 'allimages', 'ai', limit=limit, return_values='timestamp|url', **kwargs)

    def alllinks(self, start=None, prefix=None, unique=False, prop='title',
                 namespace='0', limit=None, generator=True):
        """Retrieve a list of all links on the wiki as a generator."""

        pfx = listing.List.get_prefix('al', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix,
                                                   prop=prop, namespace=namespace))
        if unique:
            kwargs[pfx + 'unique'] = '1'
        return listing.List.get_list(generator)(self, 'alllinks', 'al', limit=limit, return_values='title', **kwargs)

    def allcategories(self, start=None, prefix=None, dir='ascending', limit=None, generator=True):
        """Retrieve all categories on the wiki as a generator."""

        pfx = listing.List.get_prefix('ac', generator)
        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix, dir=dir))
        return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit=limit, **kwargs)

    def allusers(self, start=None, prefix=None, group=None, prop=None, limit=None,
                 witheditsonly=False, activeusers=False, rights=None):
        """Retrieve all users on the wiki as a generator."""

        kwargs = dict(listing.List.generate_kwargs('au', ('from', start), prefix=prefix,
                                                   group=group, prop=prop,
                                                   rights=rights,
                                                   witheditsonly=witheditsonly,
                                                   activeusers=activeusers))
        return listing.List(self, 'allusers', 'au', limit=limit, **kwargs)

    def blocks(self, start=None, end=None, dir='older', ids=None, users=None, limit=None,
               prop='id|user|by|timestamp|expiry|reason|flags'):
        """Retrieve blocks as a generator.

        Each block is a dictionary containing:
        - user: the username or IP address of the user
        - id: the ID of the block
        - timestamp: when the block was added
        - expiry: when the block runs out (infinity for indefinite blocks)
        - reason: the reason they are blocked
        - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page
        - by: the administrator who blocked the user
        - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled.

        """

        # TODO: Fix. Fix what?
        kwargs = dict(listing.List.generate_kwargs('bk', start=start, end=end, dir=dir,
                                                   users=users, prop=prop))
        return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs)

    def deletedrevisions(self, start=None, end=None, dir='older', namespace=None,
                         limit=None, prop='user|comment'):
        # TODO: Fix

        kwargs = dict(listing.List.generate_kwargs('dr', start=start, end=end, dir=dir,
                                                   namespace=namespace, prop=prop))
        return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs)

    def exturlusage(self, query, prop=None, protocol='http', namespace=None, limit=None):
        """Retrieves list of pages that link to a particular domain or URL as a generator.

        This API call mirrors the Special:LinkSearch function on-wiki.

        Query can be a domain like 'bbc.co.uk'. Wildcards can be used, e.g. '*.bbc.co.uk'.
        Alternatively, a query can contain a full domain name and some or all of a URL:
        e.g. '*.wikipedia.org/wiki/*'

        See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details.

        The generator returns dictionaries containing three keys:
        - url: the URL linked to.
        - ns: namespace of the wiki page
        - pageid: the ID of the wiki page
        - title: the page title.

        """

        kwargs = dict(listing.List.generate_kwargs('eu', query=query, prop=prop,
                                                   protocol=protocol, namespace=namespace))
        return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs)

    def logevents(self, type=None, prop=None, start=None, end=None,
                  dir='older', user=None, title=None, limit=None, action=None):

        kwargs = dict(listing.List.generate_kwargs('le', prop=prop, type=type, start=start,
                                                   end=end, dir=dir, user=user, title=title, action=action))
        return listing.List(self, 'logevents', 'le', limit=limit, **kwargs)

    def checkuserlog(self, user=None, target=None, limit=10, dir='older', start=None, end=None):

        kwargs = dict(listing.List.generate_kwargs('cul', target=target, start=start,
                                                   end=end, dir=dir, user=user))
        return listing.NestedList('entries', self, 'checkuserlog', 'cul', limit=limit, **kwargs)

    # def protectedtitles requires 1.15
    def random(self, namespace, limit=20):
        """Retrieves a generator of random page from a particular namespace.

        limit specifies the number of random articles retrieved.
        namespace is a namespace identifier integer.

        Generator contains dictionary with namespace, page ID and title.

        """

        kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace))
        return listing.List(self, 'random', 'rn', limit=limit, **kwargs)

    def recentchanges(self, start=None, end=None, dir='older', namespace=None,
                      prop=None, show=None, limit=None, type=None, toponly=None):

        kwargs = dict(listing.List.generate_kwargs('rc', start=start, end=end, dir=dir,
                                                   namespace=namespace, prop=prop, show=show, type=type,
                                                   toponly='1' if toponly else None))
        return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs)

    def search(self, search, namespace='0', what=None, redirects=False, limit=None):
        """
        Perform a full text search.
        API doc: https://www.mediawiki.org/wiki/API:Search

            >>> for result in site.search('prefix:Template:Citation/'):
            ...     print(result.get('title'))

        Args:
            search (str): The query string
            namespace (int): The namespace to search (default: 0)
            what (str): Search scope: 'text' for fulltext, or 'title' for titles only.
                        Depending on the search backend, both options may not be available.
                        For instance
                        `CirrusSearch <https://www.mediawiki.org/wiki/Help:CirrusSearch>`_
                        doesn't support 'title', but instead provides an "intitle:"
                        query string filter.
            redirects (bool): Include redirect pages in the search (option removed in MediaWiki 1.23).

        Returns:
            mwclient.listings.List: Search results iterator
        """

        kwargs = dict(listing.List.generate_kwargs('sr', search=search, namespace=namespace, what=what))
        if redirects:
            kwargs['srredirects'] = '1'
        return listing.List(self, 'search', 'sr', limit=limit, **kwargs)

    def usercontributions(self, user, start=None, end=None, dir='older', namespace=None,
                          prop=None, show=None, limit=None):

        kwargs = dict(listing.List.generate_kwargs('uc', user=user, start=start, end=end,
                                                   dir=dir, namespace=namespace, prop=prop, show=show))
        return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs)

    def users(self, users, prop='blockinfo|groups|editcount'):

        return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop)

    def watchlist(self, allrev=False, start=None, end=None, namespace=None, dir='older',
                  prop=None, show=None, limit=None):

        kwargs = dict(listing.List.generate_kwargs('wl', start=start, end=end,
                                                   namespace=namespace, dir=dir, prop=prop, show=show))
        if allrev:
            kwargs['wlallrev'] = '1'
        return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs)

    def expandtemplates(self, text, title=None, generatexml=False):
        """Takes wikitext (text) and expands templates."""

        kwargs = {}
        if title is None:
            kwargs['title'] = title
        if generatexml:
            kwargs['generatexml'] = '1'

        result = self.api('expandtemplates', text=text, **kwargs)

        if generatexml:
            return result['expandtemplates']['*'], result['parsetree']['*']
        else:
            return result['expandtemplates']['*']

    def ask(self, query, title=None):
        """Ask a query against Semantic MediaWiki."""
        kwargs = {}
        if title is None:
            kwargs['title'] = title
        result = self.raw_api('ask', query=query, **kwargs)
        return result['query']['results']

    def embeddedin(self, title, prop='title', namespace=None,
                   limit=None):
        """Yield pages which includes that transclude a given page.

        API doc: https://www.mediawiki.org/wiki/API:Embeddedin

        Args:
            title (str): list pages that includes this title.
            namespace (int): restricts search to a given namespace
            prop (str): prop list (seperated by "|")
            limit (int): default amount of page to return for each
                query
        """
        kwargs = dict(listing.List.generate_kwargs('ei',
                                                   prop=prop,
                                                   title=title,
                                                   namespace=namespace,
                                                   limit=limit))
        for info in listing.List(self, 'embeddedin', 'ei', **kwargs):
            yield page.Page(self, info['title'])