Esempio n. 1
0
    def _parse(self, version_str: str) -> None:
        version_match = MediaWikiVersion.MEDIAWIKI_VERSION.match(version_str)

        if not version_match:
            raise ValueError('Invalid version number "{}"'.format(version_str))

        components = [int(n) for n in version_match.group(1).split('.')]

        # The _dev_version numbering scheme might change. E.g. if a stage
        # between 'alpha' and 'beta' is added, 'beta', 'rc' and stable releases
        # are reassigned (beta=3, rc=4, stable=5).

        if version_match.group(3):  # wmf version
            self._dev_version = (0, int(version_match.group(3)))
        elif version_match.group(4):
            self._dev_version = (2, int(version_match.group(4)))
        elif version_match.group(5):
            self._dev_version = (3, int(version_match.group(5)))
        elif version_match.group(2) in ('alpha', '-alpha'):
            self._dev_version = (1, )
        else:
            for handled in ('wmf', 'alpha', 'beta', 'rc'):
                # if any of those pops up here our parser has failed
                assert handled not in version_match.group(2), \
                    'Found "{}" in "{}"'.format(handled,
                                                version_match.group(2))
            if version_match.group(2):
                debug(
                    'Additional unused version part '
                    '"{}"'.format(version_match.group(2)), _logger)
            self._dev_version = (4, )

        self.suffix = version_match.group(2) or ''
        self.version = tuple(components)
Esempio n. 2
0
 def stop_all(self):
     """Stop all threads the pool."""
     if self:
         debug('EARLY QUIT: Threads: {}'.format(len(self)), self._logger)
     for thd in self:
         thd.stop()
         debug('EARLY QUIT: Queue size left in {}: {}'
               .format(thd, thd.queue.qsize()), self._logger)
Esempio n. 3
0
def _flush(stop: bool = True) -> None:
    """
    Drop this process from the throttle log, after pending threads finish.

    Wait for the page-putter to flush its queue. Also drop this process from
    the throttle log. Called automatically at Python exit.
    """
    _logger = 'wiki'

    debug('_flush() called', _logger)

    def remaining() -> Tuple[int, datetime.timedelta]:
        remainingPages = page_put_queue.qsize()
        if stop:
            # -1 because we added a None element to stop the queue
            remainingPages -= 1

        remainingSeconds = datetime.timedelta(
            seconds=round(remainingPages * _config.put_throttle))
        return (remainingPages, remainingSeconds)

    if stop:
        # None task element leaves async_manager
        page_put_queue.put((None, [], {}))

    num, sec = remaining()
    if num > 0 and sec.total_seconds() > _config.noisysleep:
        output(
            color_format(
                '{lightblue}Waiting for {num} pages to be put. '
                'Estimated time remaining: {sec}{default}',
                num=num,
                sec=sec))

    if _putthread is not threading.current_thread():
        while (_putthread.is_alive() and (page_put_queue.qsize() > 0
                                          or page_put_queue_busy.qsize() > 0)):
            try:
                _putthread.join(1)
            except KeyboardInterrupt:
                if input_yn(
                        'There are {} pages remaining in the queue. '
                        'Estimated time remaining: {}\nReally exit?'.format(
                            *remaining()),
                        default=False,
                        automatic_quit=False):
                    # delete the put queue
                    with page_put_queue.mutex:
                        page_put_queue.all_tasks_done.notify_all()
                        page_put_queue.queue.clear()
                        page_put_queue.not_full.notify_all()
                    break

    # only need one drop() call because all throttles use the same global pid
    with suppress(IndexError):
        list(_sites.values())[0].throttle.drop()
        log('Dropped throttle(s).')
Esempio n. 4
0
    def append(self, thd):
        """Add a thread to the pool and start it."""
        if not isinstance(thd, threading.Thread):
            raise TypeError("Cannot append '{}' to ThreadList".format(
                type(thd)))

        while self.active_count() >= self.limit:
            time.sleep(self.wait_time)

        super().append(thd)
        thd.start()
        debug("thread {} ('{}') started".format(len(self), type(thd)),
              self._logger)
Esempio n. 5
0
def _flush(stop=True):
    """
    Drop this process from the throttle log, after pending threads finish.

    Wait for the page-putter to flush its queue. Also drop this process from
    the throttle log. Called automatically at Python exit.
    """
    _logger = "wiki"

    debug('_flush() called', _logger)

    def remaining():
        remainingPages = page_put_queue.qsize()
        if stop:
            # -1 because we added a None element to stop the queue
            remainingPages -= 1

        remainingSeconds = datetime.timedelta(seconds=(remainingPages *
                                                       config.put_throttle))
        return (remainingPages, remainingSeconds)

    if stop:
        # None task element leaves async_manager
        page_put_queue.put((None, [], {}))

    num, sec = remaining()
    if num > 0 and sec.total_seconds() > config.noisysleep:
        output(
            color_format(
                '{lightblue}Waiting for {num} pages to be put. '
                'Estimated time remaining: {sec}{default}',
                num=num,
                sec=sec))

    while _putthread.isAlive() and page_put_queue.qsize() > 0:
        try:
            _putthread.join(1)
        except KeyboardInterrupt:
            if input_yn('There are {0} pages remaining in the queue. '
                        'Estimated time remaining: {1}\nReally exit?'
                        ''.format(*remaining()),
                        default=False,
                        automatic_quit=False):
                return

    # only need one drop() call because all throttles use the same global pid
    try:
        list(_sites.values())[0].throttle.drop()
        log(u"Dropped throttle(s).")
    except IndexError:
        pass
Esempio n. 6
0
def stopme():
    """Drop this process from the throttle log, after pending threads finish.

    Can be called manually if desired, but if not, will be called automatically
    at Python exit.

    """
    global stopped
    _logger = "wiki"

    if not stopped:
        debug(u"stopme() called", _logger)

        def remaining():
            remainingPages = page_put_queue.qsize() - 1
            # -1 because we added a None element to stop the queue

            remainingSeconds = datetime.timedelta(
                seconds=(remainingPages * config.put_throttle))
            return (remainingPages, remainingSeconds)

        page_put_queue.put((None, [], {}))
        stopped = True

        if page_put_queue.qsize() > 1:
            num, sec = remaining()
            output(
                color_format(
                    '{lightblue}Waiting for {num} pages to be put. '
                    'Estimated time remaining: {sec}{default}',
                    num=num,
                    sec=sec))

        while (_putthread.isAlive()):
            try:
                _putthread.join(1)
            except KeyboardInterrupt:
                if input_yn('There are %i pages remaining in the queue. '
                            'Estimated time remaining: %s\nReally exit?' %
                            remaining(),
                            default=False,
                            automatic_quit=False):
                    return

    # only need one drop() call because all throttles use the same global pid
    try:
        list(_sites.values())[0].throttle.drop()
        log(u"Dropped throttle(s).")
    except IndexError:
        pass
Esempio n. 7
0
def _flush(stop=True):
    """
    Drop this process from the throttle log, after pending threads finish.

    Wait for the page-putter to flush its queue. Also drop this process from the
    throttle log. Called automatically at Python exit.
    """
    _logger = "wiki"

    debug('_flush() called', _logger)

    def remaining():
        remainingPages = page_put_queue.qsize()
        if stop:
            # -1 because we added a None element to stop the queue
            remainingPages -= 1

        remainingSeconds = datetime.timedelta(
            seconds=(remainingPages * config.put_throttle))
        return (remainingPages, remainingSeconds)

    if stop:
        # None task element leaves async_manager
        page_put_queue.put((None, [], {}))

    num, sec = remaining()
    if num > 0 and sec.total_seconds() > config.noisysleep:
        output(color_format(
            '{lightblue}Waiting for {num} pages to be put. '
            'Estimated time remaining: {sec}{default}', num=num, sec=sec))

    while _putthread.isAlive() and page_put_queue.qsize() > 0:
        try:
            _putthread.join(1)
        except KeyboardInterrupt:
            if input_yn('There are {0} pages remaining in the queue. '
                        'Estimated time remaining: {1}\nReally exit?'
                        ''.format(*remaining()),
                        default=False, automatic_quit=False):
                return

    # only need one drop() call because all throttles use the same global pid
    try:
        list(_sites.values())[0].throttle.drop()
        log(u"Dropped throttle(s).")
    except IndexError:
        pass
Esempio n. 8
0
def stopme():
    """
    Drop this process from the throttle log, after pending threads finish.

    Can be called manually if desired, but if not, will be called automatically
    at Python exit.
    """
    global stopped
    _logger = "wiki"

    if not stopped:
        debug(u"stopme() called", _logger)

        def remaining():
            remainingPages = page_put_queue.qsize() - 1
            # -1 because we added a None element to stop the queue

            remainingSeconds = datetime.timedelta(
                seconds=(remainingPages * config.put_throttle))
            return (remainingPages, remainingSeconds)

        page_put_queue.put((None, [], {}))
        stopped = True

        if page_put_queue.qsize() > 1:
            num, sec = remaining()
            output(color_format(
                '{lightblue}Waiting for {num} pages to be put. '
                'Estimated time remaining: {sec}{default}', num=num, sec=sec))

        while(_putthread.isAlive()):
            try:
                _putthread.join(1)
            except KeyboardInterrupt:
                if input_yn('There are %i pages remaining in the queue. '
                            'Estimated time remaining: %s\nReally exit?'
                            % remaining(), default=False, automatic_quit=False):
                    return

    # only need one drop() call because all throttles use the same global pid
    try:
        list(_sites.values())[0].throttle.drop()
        log(u"Dropped throttle(s).")
    except IndexError:
        pass
Esempio n. 9
0
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None):
    """A factory method to obtain a Site object.

    Site objects are cached and reused by this method.

    By default rely on config settings. These defaults may all be overridden
    using the method parameters.

    @param code: language code (override config.mylang)
    @type code: string
    @param fam: family name or object (override config.family)
    @type fam: string or Family
    @param user: bot user name to use on this site (override config.usernames)
    @type user: unicode
    @param sysop: sysop user to use on this site (override config.sysopnames)
    @type sysop: unicode
    @param interface: site class or name of class in pywikibot.site
        (override config.site_interface)
    @type interface: subclass of L{pywikibot.site.BaseSite} or string
    @param url: Instead of code and fam, does try to get a Site based on the
        URL. Still requires that the family supporting that URL exists.
    @type url: string
    @rtype: pywikibot.site.APISite

    """
    # Either code and fam or only url
    if url and (code or fam):
        raise ValueError('URL to the wiki OR a pair of code and family name '
                         'should be provided')
    _logger = "wiki"

    if url:
        if url not in _url_cache:
            matched_sites = []
            # Iterate through all families and look, which does apply to
            # the given URL
            for fam in config.family_files:
                family = Family.load(fam)
                code = family.from_url(url)
                if code is not None:
                    matched_sites += [(code, family)]

            if matched_sites:
                if len(matched_sites) > 1:
                    warning(
                        'Found multiple matches for URL "{0}": {1} (use first)'
                        .format(url, ', '.join(str(s) for s in matched_sites)))
                _url_cache[url] = matched_sites[0]
            else:
                # TODO: As soon as AutoFamily is ready, try and use an
                #       AutoFamily
                _url_cache[url] = None

        cached = _url_cache[url]
        if cached:
            code = cached[0]
            fam = cached[1]
        else:
            raise SiteDefinitionError("Unknown URL '{0}'.".format(url))
    else:
        # Fallback to config defaults
        code = code or config.mylang
        fam = fam or config.family

        if not isinstance(fam, Family):
            fam = Family.load(fam)

    interface = interface or fam.interface(code)

    # config.usernames is initialised with a defaultdict for each family name
    family_name = str(fam)

    code_to_user = config.usernames['*'].copy()
    code_to_user.update(config.usernames[family_name])
    user = user or code_to_user.get(code) or code_to_user.get('*')

    code_to_sysop = config.sysopnames['*'].copy()
    code_to_sysop.update(config.sysopnames[family_name])
    sysop = sysop or code_to_sysop.get(code) or code_to_sysop.get('*')

    if not isinstance(interface, type):
        # If it isnt a class, assume it is a string
        try:
            tmp = __import__('pywikibot.site', fromlist=[interface])
            interface = getattr(tmp, interface)
        except ImportError:
            raise ValueError('Invalid interface name: {0}'.format(interface))

    if not issubclass(interface, BaseSite):
        warning('Site called with interface=%s' % interface.__name__)

    user = normalize_username(user)
    key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user)
    if key not in _sites or not isinstance(_sites[key], interface):
        _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop)
        debug(u"Instantiated %s object '%s'"
              % (interface.__name__, _sites[key]), _logger)

        if _sites[key].code != code:
            warn('Site %s instantiated using different code "%s"'
                 % (_sites[key], code), UserWarning, 2)

    return _sites[key]
Esempio n. 10
0
def Site(code: Optional[str] = None,
         fam=None,
         user: Optional[str] = None,
         *,
         interface=None,
         url: Optional[str] = None) -> Union[APISite, DataSite, ClosedSite]:
    """A factory method to obtain a Site object.

    Site objects are cached and reused by this method.

    By default rely on config settings. These defaults may all be overridden
    using the method parameters.

    @param code: language code (override config.mylang)
        code may also be a sitename like 'wikipedia:test'
    @param fam: family name or object (override config.family)
    @type fam: str or pywikibot.family.Family
    @param user: bot user name to use on this site (override config.usernames)
    @param interface: site class or name of class in pywikibot.site
        (override config.site_interface)
    @type interface: subclass of L{pywikibot.site.BaseSite} or string
    @param url: Instead of code and fam, does try to get a Site based on the
        URL. Still requires that the family supporting that URL exists.
    @raises ValueError: URL and pair of code and family given
    @raises ValueError: Invalid interface name
    """
    _logger = 'wiki'

    if url:
        # Either code and fam or url with optional fam for AutoFamily name
        if code:
            raise ValueError(
                'URL to the wiki OR a pair of code and family name '
                'should be provided')
        code, fam = _code_fam_from_url(url, fam)
    elif code and ':' in code:
        if fam:
            raise ValueError('sitename OR a pair of code and family name '
                             'should be provided')
        fam, _, code = code.partition(':')
    else:
        # Fallback to config defaults
        code = code or config.mylang
        fam = fam or config.family

    if not isinstance(fam, Family):
        fam = Family.load(fam)

    interface = interface or fam.interface(code)

    # config.usernames is initialised with a defaultdict for each family name
    family_name = str(fam)

    code_to_user = {}
    if '*' in config.usernames:  # T253127: usernames is a defaultdict
        code_to_user = config.usernames['*'].copy()
    code_to_user.update(config.usernames[family_name])
    user = user or code_to_user.get(code) or code_to_user.get('*')

    if not isinstance(interface, type):
        # If it isn't a class, assume it is a string
        try:
            tmp = __import__('pywikibot.site', fromlist=[interface])
        except ImportError:
            raise ValueError('Invalid interface name: {0}'.format(interface))
        else:
            interface = getattr(tmp, interface)

    if not issubclass(interface, BaseSite):
        warning('Site called with interface=%s' % interface.__name__)

    user = normalize_username(user)
    key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user)
    if key not in _sites or not isinstance(_sites[key], interface):
        _sites[key] = interface(code=code, fam=fam, user=user)
        debug(
            "Instantiated %s object '%s'" % (interface.__name__, _sites[key]),
            _logger)

        if _sites[key].code != code:
            warn(
                'Site %s instantiated using different code "%s"' %
                (_sites[key], code), UserWarning, 2)

    return _sites[key]
Esempio n. 11
0
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None):
    """A factory method to obtain a Site object.

    Site objects are cached and reused by this method.

    By default rely on config settings. These defaults may all be overridden
    using the method parameters.

    @param code: language code (override config.mylang)
    @type code: string
    @param fam: family name or object (override config.family)
    @type fam: string or Family
    @param user: bot user name to use on this site (override config.usernames)
    @type user: unicode
    @param sysop: sysop user to use on this site (override config.sysopnames)
    @type sysop: unicode
    @param interface: site class or name of class in pywikibot.site
        (override config.site_interface)
    @type interface: subclass of L{pywikibot.site.BaseSite} or string
    @param url: Instead of code and fam, does try to get a Site based on the
        URL. Still requires that the family supporting that URL exists.
    @type url: string
    @rtype: pywikibot.site.APISite

    """
    # Either code and fam or only url
    if url and (code or fam):
        raise ValueError('URL to the wiki OR a pair of code and family name '
                         'should be provided')
    _logger = "wiki"

    if url:
        if url not in _url_cache:
            matched_sites = []
            # Iterate through all families and look, which does apply to
            # the given URL
            for fam in config.family_files:
                family = Family.load(fam)
                code = family.from_url(url)
                if code is not None:
                    matched_sites += [(code, family)]

            if matched_sites:
                if len(matched_sites) > 1:
                    warning(
                        'Found multiple matches for URL "{0}": {1} (use first)'
                        .format(url, ', '.join(str(s) for s in matched_sites)))
                _url_cache[url] = matched_sites[0]
            else:
                # TODO: As soon as AutoFamily is ready, try and use an
                #       AutoFamily
                _url_cache[url] = None

        cached = _url_cache[url]
        if cached:
            code = cached[0]
            fam = cached[1]
        else:
            raise SiteDefinitionError("Unknown URL '{0}'.".format(url))
    else:
        # Fallback to config defaults
        code = code or config.mylang
        fam = fam or config.family

        if not isinstance(fam, Family):
            fam = Family.load(fam)

    interface = interface or fam.interface(code)

    # config.usernames is initialised with a defaultdict for each family name
    family_name = str(fam)

    code_to_user = config.usernames['*'].copy()
    code_to_user.update(config.usernames[family_name])
    user = user or code_to_user.get(code) or code_to_user.get('*')

    code_to_sysop = config.sysopnames['*'].copy()
    code_to_sysop.update(config.sysopnames[family_name])
    sysop = sysop or code_to_sysop.get(code) or code_to_sysop.get('*')

    if not isinstance(interface, type):
        # If it isnt a class, assume it is a string
        try:
            tmp = __import__('pywikibot.site', fromlist=[interface])
            interface = getattr(tmp, interface)
        except ImportError:
            raise ValueError('Invalid interface name: {0}'.format(interface))

    if not issubclass(interface, BaseSite):
        warning('Site called with interface=%s' % interface.__name__)

    user = normalize_username(user)
    key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user)
    if key not in _sites or not isinstance(_sites[key], interface):
        _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop)
        debug(
            u"Instantiated %s object '%s'" % (interface.__name__, _sites[key]),
            _logger)

        if _sites[key].code != code:
            warn(
                'Site %s instantiated using different code "%s"' %
                (_sites[key], code), UserWarning, 2)

    return _sites[key]
Esempio n. 12
0
def intersect_generators(*iterables, allow_duplicates: bool = False):
    """Intersect generators listed in iterables.

    Yield items only if they are yielded by all generators of iterables.
    Threads (via ThreadedGenerator) are used in order to run generators
    in parallel, so that items can be yielded before generators are
    exhausted.

    Threads are stopped when they are either exhausted or Ctrl-C is pressed.
    Quitting before all generators are finished is attempted if
    there is no more chance of finding an item in all queues.

    Sample:

    >>> iterables = 'mississippi', 'missouri'
    >>> list(intersect_generators(*iterables))
    ['m', 'i', 's']
    >>> list(intersect_generators(*iterables, allow_duplicates=True))
    ['m', 'i', 's', 's', 'i']

    :param iterables: page generators
    :param allow_duplicates: optional keyword argument to allow duplicates
        if present in all generators
    """
    # 'allow_duplicates' must be given as keyword argument
    if iterables and iterables[-1] in (True, False):
        allow_duplicates = iterables[-1]
        iterables = iterables[:-1]
        issue_deprecation_warning("'allow_duplicates' as positional argument",
                                  'keyword argument "allow_duplicates={}"'
                                  .format(allow_duplicates),
                                  since='6.4.0')

    # iterables must not be given as tuple or list
    if len(iterables) == 1 and isinstance(iterables[0], (list, tuple)):
        iterables = iterables[0]
        issue_deprecation_warning("'iterables' as list type",
                                  "consecutive iterables or use '*' to unpack",
                                  since='6.4.0')

    if not iterables:
        return

    if len(iterables) == 1:
        yield from iterables[0]
        return

    # If any generator is empty, no pages are going to be returned
    for source in iterables:
        if not source:
            debug('At least one generator ({!r}) is empty and execution was '
                  'skipped immediately.'.format(source), 'intersect')
            return

    # Item is cached to check that it is found n_gen
    # times before being yielded.
    cache = collections.defaultdict(collections.Counter)
    n_gen = len(iterables)

    # Class to keep track of alive threads.
    # Start new threads and remove completed threads.
    thrlist = ThreadList()

    for source in iterables:
        threaded_gen = ThreadedGenerator(name=repr(source), target=source)
        threaded_gen.daemon = True
        thrlist.append(threaded_gen)

    ones = collections.Counter(thrlist)
    seen = {}

    while True:
        # Get items from queues in a round-robin way.
        for t in thrlist:
            try:
                # TODO: evaluate if True and timeout is necessary.
                item = t.queue.get(True, 0.1)

                if not allow_duplicates and hash(item) in seen:
                    continue

                # Cache entry is a Counter of ThreadedGenerator objects.
                cache[item].update([t])
                if len(cache[item]) == n_gen:
                    if allow_duplicates:
                        yield item
                        # Remove item from cache if possible.
                        if all(el == 1 for el in cache[item].values()):
                            cache.pop(item)
                        else:
                            cache[item] -= ones
                    else:
                        yield item
                        cache.pop(item)
                        seen[hash(item)] = True

                active = thrlist.active_count()
                max_cache = n_gen
                if cache.values():
                    max_cache = max(len(v) for v in cache.values())
                # No. of active threads is not enough to reach n_gen.
                # We can quit even if some thread is still active.
                # There could be an item in all generators which has not yet
                # appeared from any generator. Only when we have lost one
                # generator, then we can bail out early based on seen items.
                if active < n_gen and n_gen - max_cache > active:
                    thrlist.stop_all()
                    return
            except queue.Empty:
                pass
            except KeyboardInterrupt:
                thrlist.stop_all()
            # All threads are done.
            if thrlist.active_count() == 0:
                return
Esempio n. 13
0
    @mode_check_decorator
    def load(self, **kwargs):
        """Load cookies from file."""
        super(PywikibotCookieJar, self).load()

    @mode_check_decorator
    def save(self, **kwargs):
        """Save cookies to file."""
        super(PywikibotCookieJar, self).save()


cookie_jar = PywikibotCookieJar(config.datafilepath('pywikibot.lwp'))
try:
    cookie_jar.load()
except (IOError, cookielib.LoadError):
    debug('Loading cookies failed.', _logger)
else:
    debug('Loaded cookies from file.', _logger)

session = requests.Session()
session.cookies = cookie_jar


# Prepare flush on quit
def _flush():
    session.close()
    message = 'Closing network session.'
    if hasattr(sys, 'last_type'):
        # we quit because of an exception
        print(sys.last_type)  # flake8: disable=T003 (print)
        critical(message)
Esempio n. 14
0
def intersect_generators(*iterables, allow_duplicates: bool = False):
    """Generator of intersect iterables.

    Yield items only if they are yielded by all iterables. zip_longest
    is used to retrieve items from all iterables in parallel, so that
    items can be yielded before iterables are exhausted.

    Generator is stopped when all iterables are exhausted. Quitting
    before all iterables are finished is attempted if there is no more
    chance of finding an item in all of them.

    Sample:

    >>> iterables = 'mississippi', 'missouri'
    >>> list(intersect_generators(*iterables))
    ['m', 'i', 's']
    >>> list(intersect_generators(*iterables, allow_duplicates=True))
    ['m', 'i', 's', 's', 'i']


    .. versionadded:: 3.0

    .. versionchanged:: 5.0
       Avoid duplicates (T263947).

    .. versionchanged:: 6.4
       ``genlist`` was renamed to ``iterables``; consecutive iterables
       are to be used as iterables parameters or '*' to unpack a list

    .. deprecated:: 6.4
       ``allow_duplicates`` as positional argument,
       ``iterables`` as list type

    .. versionchanged:: 7.0
       Reimplemented without threads which is up to 10'000 times faster

    :param iterables: page generators
    :param allow_duplicates: optional keyword argument to allow duplicates
        if present in all generators
    """
    # 'allow_duplicates' must be given as keyword argument
    if iterables and iterables[-1] in (True, False):
        allow_duplicates = iterables[-1]
        iterables = iterables[:-1]
        issue_deprecation_warning(
            "'allow_duplicates' as positional argument",
            'keyword argument "allow_duplicates={}"'.format(allow_duplicates),
            since='6.4.0')

    # iterables must not be given as tuple or list
    if len(iterables) == 1 and isinstance(iterables[0], (list, tuple)):
        iterables = iterables[0]
        issue_deprecation_warning("'iterables' as list type",
                                  "consecutive iterables or use '*' to unpack",
                                  since='6.4.0')

    if not iterables:
        return

    if len(iterables) == 1:
        yield from iterables[0]
        return

    # If any iterable is empty, no pages are going to be returned
    for source in iterables:
        if not source:
            debug(
                'At least one iterable ({!r}) is empty and execution was '
                'skipped immediately.'.format(source), 'intersect')
            return

    # Item is cached to check that it is found n_gen times
    # before being yielded.
    cache = collections.defaultdict(collections.Counter)
    n_gen = len(iterables)

    ones = collections.Counter(range(n_gen))
    active_iterables = set(range(n_gen))
    seen = set()

    # Get items from iterables in a round-robin way.
    sentinel = object()
    for items in zip_longest(*iterables, fillvalue=sentinel):
        for index, item in enumerate(items):

            if item is sentinel:
                active_iterables.discard(index)
                continue

            if not allow_duplicates and hash(item) in seen:
                continue

            # Each cache entry is a Counter of iterables' index
            cache[item][index] += 1

            if len(cache[item]) == n_gen:
                yield item

                # Remove item from cache if possible or decrease Counter entry
                if not allow_duplicates:
                    del cache[item]
                    seen.add(hash(item))
                elif cache[item] == ones:
                    del cache[item]
                else:
                    cache[item] -= ones

        # We can quit if an iterable is exceeded and cached iterables is
        # a subset of active iterables.
        if len(active_iterables) < n_gen:
            cached_iterables = set(
                chain.from_iterable(v.keys() for v in cache.values()))
            if cached_iterables <= active_iterables:
                return
Esempio n. 15
0
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None):
    """A factory method to obtain a Site object.

    Site objects are cached and reused by this method.

    By default rely on config settings. These defaults may all be overridden
    using the method parameters.

    @param code: language code (override config.mylang)
    @type code: str
    @param fam: family name or object (override config.family)
    @type fam: str or Family
    @param user: bot user name to use on this site (override config.usernames)
    @type user: str
    @param sysop: sysop user to use on this site (override config.sysopnames)
    @type sysop: str
    @param interface: site class or name of class in pywikibot.site
        (override config.site_interface)
    @type interface: subclass of L{pywikibot.site.BaseSite} or string
    @param url: Instead of code and fam, does try to get a Site based on the
        URL. Still requires that the family supporting that URL exists.
    @type url: str
    @rtype: pywikibot.site.APISite
    @raises ValueError: URL and pair of code and family given
    @raises ValueError: Invalid interface name
    @raises SiteDefinitionError: Unknown URL
    """
    _logger = 'wiki'

    if url:
        # Either code and fam or only url
        if code or fam:
            raise ValueError(
                'URL to the wiki OR a pair of code and family name '
                'should be provided')
        code, fam = _code_fam_from_url(url)
    else:
        # Fallback to config defaults
        code = code or config.mylang
        fam = fam or config.family

        if not isinstance(fam, Family):
            fam = Family.load(fam)

    interface = interface or fam.interface(code)

    # config.usernames is initialised with a defaultdict for each family name
    family_name = str(fam)

    code_to_user = config.usernames['*'].copy()
    code_to_user.update(config.usernames[family_name])
    user = user or code_to_user.get(code) or code_to_user.get('*')

    code_to_sysop = config.sysopnames['*'].copy()
    code_to_sysop.update(config.sysopnames[family_name])
    sysop = sysop or code_to_sysop.get(code) or code_to_sysop.get('*')

    if not isinstance(interface, type):
        # If it isn't a class, assume it is a string
        if PY2:  # Must not be unicode in Python 2
            interface = str(interface)
        try:
            tmp = __import__('pywikibot.site', fromlist=[interface])
        except ImportError:
            raise ValueError('Invalid interface name: {0}'.format(interface))
        else:
            interface = getattr(tmp, interface)

    if not issubclass(interface, BaseSite):
        warning('Site called with interface=%s' % interface.__name__)

    user = normalize_username(user)
    key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user)
    if key not in _sites or not isinstance(_sites[key], interface):
        _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop)
        debug("Instantiated %s object '%s'"
              % (interface.__name__, _sites[key]), _logger)

        if _sites[key].code != code:
            warn('Site %s instantiated using different code "%s"'
                 % (_sites[key], code), UserWarning, 2)

    return _sites[key]
Esempio n. 16
0
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None):
    """A factory method to obtain a Site object.

    Site objects are cached and reused by this method.

    By default rely on config settings. These defaults may all be overridden
    using the method parameters.

    @param code: language code (override config.mylang)
    @type code: str
    @param fam: family name or object (override config.family)
    @type fam: str or Family
    @param user: bot user name to use on this site (override config.usernames)
    @type user: str
    @param sysop: sysop user to use on this site (override config.sysopnames)
    @type sysop: str
    @param interface: site class or name of class in pywikibot.site
        (override config.site_interface)
    @type interface: subclass of L{pywikibot.site.BaseSite} or string
    @param url: Instead of code and fam, does try to get a Site based on the
        URL. Still requires that the family supporting that URL exists.
    @type url: str
    @rtype: pywikibot.site.APISite
    @raises ValueError: URL and pair of code and family given
    @raises ValueError: Invalid interface name
    @raises SiteDefinitionError: Unknown URL
    """
    _logger = 'wiki'

    if url:
        # Either code and fam or only url
        if code or fam:
            raise ValueError(
                'URL to the wiki OR a pair of code and family name '
                'should be provided')
        code, fam = _code_fam_from_url(url)
    else:
        # Fallback to config defaults
        code = code or config.mylang
        fam = fam or config.family

        if not isinstance(fam, Family):
            fam = Family.load(fam)

    interface = interface or fam.interface(code)

    # config.usernames is initialised with a defaultdict for each family name
    family_name = str(fam)

    code_to_user = config.usernames['*'].copy()
    code_to_user.update(config.usernames[family_name])
    user = user or code_to_user.get(code) or code_to_user.get('*')

    code_to_sysop = config.sysopnames['*'].copy()
    code_to_sysop.update(config.sysopnames[family_name])
    sysop = sysop or code_to_sysop.get(code) or code_to_sysop.get('*')

    if not isinstance(interface, type):
        # If it isn't a class, assume it is a string
        if PY2:  # Must not be unicode in Python 2
            interface = str(interface)
        try:
            tmp = __import__('pywikibot.site', fromlist=[interface])
        except ImportError:
            raise ValueError('Invalid interface name: {0}'.format(interface))
        else:
            interface = getattr(tmp, interface)

    if not issubclass(interface, BaseSite):
        warning('Site called with interface=%s' % interface.__name__)

    user = normalize_username(user)
    key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user)
    if key not in _sites or not isinstance(_sites[key], interface):
        _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop)
        debug("Instantiated %s object '%s'"
              % (interface.__name__, _sites[key]), _logger)

        if _sites[key].code != code:
            warn('Site %s instantiated using different code "%s"'
                 % (_sites[key], code), UserWarning, 2)

    return _sites[key]
Esempio n. 17
0
    @mode_check_decorator
    def load(self, **kwargs):
        """Load cookies from file."""
        super(PywikibotCookieJar, self).load()

    @mode_check_decorator
    def save(self, **kwargs):
        """Save cookies to file."""
        super(PywikibotCookieJar, self).save()


cookie_jar = PywikibotCookieJar(config.datafilepath('pywikibot.lwp'))
try:
    cookie_jar.load()
except (IOError, cookielib.LoadError):
    debug('Loading cookies failed.', _logger)
else:
    debug('Loaded cookies from file.', _logger)

session = requests.Session()
session.cookies = cookie_jar


# Prepare flush on quit
def _flush():
    session.close()
    message = 'Closing network session.'
    if hasattr(sys, 'last_type'):
        # we quit because of an exception
        print(sys.last_type)
        critical(message)
Esempio n. 18
0
def Site(code: Optional[str] = None,
         fam: Union[str, 'Family', None] = None,
         user: Optional[str] = None,
         *,
         interface: Union[str, 'BaseSite', None] = None,
         url: Optional[str] = None) -> BaseSite:
    """A factory method to obtain a Site object.

    Site objects are cached and reused by this method.

    By default rely on config settings. These defaults may all be overridden
    using the method parameters.

    Creating the default site using config.mylang and config.family::

        site = pywikibot.Site()

    Override default site code::

        site = pywikibot.Site('fr')

    Override default family::

        site = pywikibot.Site(family='wikisource')

    Setting a specific site::

        site = pywikibot.Site('fr', 'wikisource')

    which is equal to::

        site = pywikibot.Site('wikisource:fr')

    :Note: An already created site is cached an a new variable points to
        the same object if interface, family, code and user are equal:

    >>> import pywikibot
    >>> site_1 = pywikibot.Site('wikisource:fr')
    >>> site_2 = pywikibot.Site('fr', 'wikisource')
    >>> site_1 is site_2
    True
    >>> site_1
    APISite("fr", "wikisource")

    ``APISite`` is the default interface. Refer :py:obj:`pywikibot.site` for
    other interface types.

    **Never create a site object via interface class directly.**
    Always use this factory method.

    :param code: language code (override config.mylang)
        code may also be a sitename like 'wikipedia:test'
    :param fam: family name or object (override config.family)
    :param user: bot user name to use on this site (override config.usernames)
    :param interface: site class or name of class in :py:obj:`pywikibot.site`
        (override config.site_interface)
    :param url: Instead of code and fam, does try to get a Site based on the
        URL. Still requires that the family supporting that URL exists.
    :raises ValueError: URL and pair of code and family given
    :raises ValueError: Invalid interface name
    :raises ValueError: Missing Site code
    :raises ValueError: Missing Site family
    """
    _logger = 'wiki'

    if url:
        # Either code and fam or url with optional fam for AutoFamily name
        if code:
            raise ValueError(
                'URL to the wiki OR a pair of code and family name '
                'should be provided')
        code, fam = _code_fam_from_url(url, fam)
    elif code and ':' in code:
        if fam:
            raise ValueError('sitename OR a pair of code and family name '
                             'should be provided')
        fam, _, code = code.partition(':')
    else:
        # Fallback to config defaults
        code = code or _config.mylang
        fam = fam or _config.family

    if not (code and fam):
        raise ValueError(
            'Missing Site {}'.format('code' if not code else 'family'))

    if not isinstance(fam, Family):
        fam = Family.load(fam)

    interface = interface or fam.interface(code)

    # config.usernames is initialised with a defaultdict for each family name
    family_name = str(fam)

    code_to_user = {}
    if '*' in _config.usernames:  # T253127: usernames is a defaultdict
        code_to_user = _config.usernames['*'].copy()
    code_to_user.update(_config.usernames[family_name])
    user = user or code_to_user.get(code) or code_to_user.get('*')

    if not isinstance(interface, type):
        # If it isn't a class, assume it is a string
        try:
            tmp = __import__('pywikibot.site', fromlist=[interface])
        except ImportError:
            raise ValueError('Invalid interface name: {}'.format(interface))
        else:
            interface = getattr(tmp, interface)

    if not issubclass(interface, BaseSite):
        warning('Site called with interface={}'.format(interface.__name__))

    user = normalize_username(user)
    key = '{}:{}:{}:{}'.format(interface.__name__, fam, code, user)
    if key not in _sites or not isinstance(_sites[key], interface):
        _sites[key] = interface(code=code, fam=fam, user=user)
        debug(
            "Instantiated {} object '{}'".format(interface.__name__,
                                                 _sites[key]), _logger)

        if _sites[key].code != code:
            warn(
                'Site {} instantiated using different code "{}"'.format(
                    _sites[key], code), UserWarning, 2)

    return _sites[key]