def __init__(self, wikihost, rchost, rcport=80, rcpath='/rc', total=None): """Constructor for RcListenerThread.""" super(RcListenerThread, self).__init__() self.rchost = rchost self.rcport = rcport self.rcpath = rcpath self.wikihost = wikihost self.daemon = True self.running = False self.queue = Queue() self.warn_queue_length = 100 self.total = total self.count = 0 import socketIO_client debug('Opening connection to %r' % self, _logger) self.client = socketIO_client.SocketIO(rchost, rcport) thread = self class RCListener(socketIO_client.BaseNamespace): def on_change(self, change): debug('Received change %r' % change, _logger) if not thread.running: debug('Thread in shutdown mode; ignoring change.', _logger) return thread.count += 1 thread.queue.put(change) if thread.queue.qsize() > thread.warn_queue_length: warning('%r queue length exceeded %i' % (thread, thread.warn_queue_length), _logger=_logger) thread.warn_queue_length = thread.warn_queue_length + 100 if thread.total is not None and thread.count >= thread.total: thread.stop() return def on_connect(self): debug('Connected to %r; subscribing to %s' % (thread, thread.wikihost), _logger) self.emit('subscribe', thread.wikihost) debug('Subscribed to %s' % thread.wikihost, _logger) def on_reconnect(self): debug('Reconnected to %r' % (thread,), _logger) self.on_connect() class GlobalListener(socketIO_client.BaseNamespace): def on_heartbeat(self): self._transport.send_heartbeat() self.client.define(RCListener, rcpath) self.client.define(GlobalListener)
def __init__(self, wikihost, rchost, rcport=80, rcpath='/rc', total=None): """Constructor for RcListenerThread.""" super(RcListenerThread, self).__init__() self.rchost = rchost self.rcport = rcport self.rcpath = rcpath self.wikihost = wikihost self.daemon = True self.running = False self.queue = Queue() self.warn_queue_length = 100 self.total = total self.count = 0 import socketIO_client debug('Opening connection to %r' % self, _logger) self.client = socketIO_client.SocketIO(rchost, rcport) thread = self class RCListener(socketIO_client.BaseNamespace): def on_change(self, change): debug('Received change %r' % change, _logger) if not thread.running: debug('Thread in shutdown mode; ignoring change.', _logger) return thread.count += 1 thread.queue.put(change) if thread.queue.qsize() > thread.warn_queue_length: warning('%r queue length exceeded %i' % (thread, thread.warn_queue_length), _logger=_logger) thread.warn_queue_length = thread.warn_queue_length + 100 if thread.total is not None and thread.count >= thread.total: thread.stop() return def on_connect(self): debug( 'Connected to %r; subscribing to %s' % (thread, thread.wikihost), _logger) self.emit('subscribe', thread.wikihost) debug('Subscribed to %s' % thread.wikihost, _logger) def on_reconnect(self): debug('Reconnected to %r' % (thread, ), _logger) self.on_connect() class GlobalListener(socketIO_client.BaseNamespace): def on_heartbeat(self): self._transport.send_heartbeat() self.client.define(RCListener, rcpath) self.client.define(GlobalListener)
def run(self): """Threaded function. Runs insided the thread when started with .start().""" self.running = True while self.running: self.client.wait(seconds=0.1) debug('Shut down event loop for %r' % self, _logger) self.client.disconnect() debug('Disconnected %r' % self, _logger) self.queue.put(None)
def rc_listener(wikihost, rchost, rcport=80, rcpath='/rc', total=None): """RC Changes Generator. Yields changes received from RCstream. @param wikihost: the hostname of the wiki we want to get changes for. This is passed to rcstream using a 'subscribe' command. Pass '*' to listen to all wikis for a given rc host. @param rchost: the recent changes stream host to connect to. For Wikimedia wikis, this is 'stream.wikimedia.org' @param rcport: the port to connect to (default: 80) @param rcpath: the sockets.io path. For Wikimedia wikis, this is '/rc'. (default: '/rc') @param total: the maximum number of entries to return. The underlying thread is shut down then this number is reached. @yields dict: dict as formatted by MediaWiki's MachineReadableRCFeedFormatter[1], which consists of at least id (recent changes id), type ('edit', 'new', 'log' or 'external'), namespace, title, comment, timestamp, user and bot (bot flag for the change). See [1] for more details. @raises ImportError [1] https://github.com/wikimedia/mediawiki/blob/master/includes/rcfeed/MachineReadableRCFeedFormatter.php """ try: # this is just to test whether socketIO_client is installed or not, # as the ImportError would otherwise pop up in the worker thread # where it's not easily caught. We don't use it, so we silence # flake8 with noqa. import socketIO_client # noqa except ImportError: raise ImportError('socketIO_client is required for the rc stream; ' 'install it with pip install socketIO_client') rc_thread = RcListenerThread( wikihost=wikihost, rchost=rchost, rcport=rcport, rcpath=rcpath, total=total ) debug('Starting rcstream thread %r' % rc_thread, _logger) rc_thread.start() while True: try: element = rc_thread.queue.get(timeout=0.1) except Empty: continue if element is None: return yield element
def rc_listener(wikihost, rchost, rcport=80, rcpath='/rc', total=None): """RC Changes Generator. Yields changes received from RCstream. @param wikihost: the hostname of the wiki we want to get changes for. This is passed to rcstream using a 'subscribe' command. Pass '*' to listen to all wikis for a given rc host. @param rchost: the recent changes stream host to connect to. For Wikimedia wikis, this is 'stream.wikimedia.org' @param rcport: the port to connect to (default: 80) @param rcpath: the sockets.io path. For Wikimedia wikis, this is '/rc'. (default: '/rc') @param total: the maximum number of entries to return. The underlying thread is shut down then this number is reached. @yields dict: dict as formatted by MediaWiki's MachineReadableRCFeedFormatter[1], which consists of at least id (recent changes id), type ('edit', 'new', 'log' or 'external'), namespace, title, comment, timestamp, user and bot (bot flag for the change). See [1] for more details. @raises ImportError [1] https://github.com/wikimedia/mediawiki/blob/master/includes/rcfeed/MachineReadableRCFeedFormatter.php """ try: # this is just to test whether socketIO_client is installed or not, # as the ImportError would otherwise pop up in the worker thread # where it's not easily caught. We don't use it, so we silence # flake8 with noqa. import socketIO_client # noqa except ImportError: raise ImportError('socketIO_client is required for the rc stream; ' 'install it with pip install socketIO_client') rc_thread = RcListenerThread(wikihost=wikihost, rchost=rchost, rcport=rcport, rcpath=rcpath, total=total) debug('Starting rcstream thread %r' % rc_thread, _logger) rc_thread.start() while True: try: element = rc_thread.queue.get(timeout=0.1) except Empty: continue if element is None: return yield element
def stopme(): """Drop this process from the throttle log, after pending threads finish. Can be called manually if desired, but if not, will be called automatically at Python exit. """ global stopped _logger = "wiki" if not stopped: debug("stopme() called", _logger) def remaining(): remainingPages = page_put_queue.qsize() - 1 # -1 because we added a None element to stop the queue remainingSeconds = datetime.timedelta(seconds=(remainingPages * config.put_throttle)) return (remainingPages, remainingSeconds) page_put_queue.put((None, [], {})) stopped = True if page_put_queue.qsize() > 1: num, sec = remaining() format_values = dict(num=num, sec=sec) output( "\03{lightblue}" "Waiting for %(num)i pages to be put. " "Estimated time remaining: %(sec)s" "\03{default}" % format_values ) while _putthread.isAlive(): try: _putthread.join(1) except KeyboardInterrupt: if input_yn( "There are %i pages remaining in the queue. " "Estimated time remaining: %s\nReally exit?" % remaining(), default=False, automatic_quit=False, ): return # only need one drop() call because all throttles use the same global pid try: list(_sites.values())[0].throttle.drop() log("Dropped throttle(s).") except IndexError: pass
def stopme(): """Drop this process from the throttle log, after pending threads finish. Can be called manually if desired, but if not, will be called automatically at Python exit. """ global stopped _logger = "wiki" if not stopped: debug(u"stopme() called", _logger) def remaining(): remainingPages = page_put_queue.qsize() - 1 # -1 because we added a None element to stop the queue remainingSeconds = datetime.timedelta( seconds=(remainingPages * config.put_throttle)) return (remainingPages, remainingSeconds) page_put_queue.put((None, [], {})) stopped = True if page_put_queue.qsize() > 1: num, sec = remaining() format_values = dict(num=num, sec=sec) output(u'\03{lightblue}' u'Waiting for %(num)i pages to be put. ' u'Estimated time remaining: %(sec)s' u'\03{default}' % format_values) while(_putthread.isAlive()): try: _putthread.join(1) except KeyboardInterrupt: answer = inputChoice(u"""\ There are %i pages remaining in the queue. Estimated time remaining: %s Really exit?""" % remaining(), ['yes', 'no'], ['y', 'N'], 'N') if answer == 'y': return # only need one drop() call because all throttles use the same global pid try: list(_sites.values())[0].throttle.drop() log(u"Dropped throttle(s).") except IndexError: pass
def on_change(self, change): debug('Received change {0!r}'.format(change), _logger) if not thread.running: debug('Thread in shutdown mode; ignoring change.', _logger) return thread.count += 1 thread.queue.put(change) if thread.queue.qsize() > thread.warn_queue_length: warning('{0!r} queue length exceeded {1:d}'.format(thread, thread.warn_queue_length), _logger=_logger) thread.warn_queue_length = thread.warn_queue_length + 100 if thread.total is not None and thread.count >= thread.total: thread.stop() return
def rc_listener(wikihost, rchost, rcport=80, rcpath='/rc', total=None): """Yield changes received from RCstream. @param wikihost: the hostname of the wiki we want to get changes for. This is passed to rcstream using a 'subscribe' command. Pass '*' to listen to all wikis for a given rc host. @param rchost: the recent changes stream host to connect to. For Wikimedia wikis, this is 'https://stream.wikimedia.org' @param rcport: the port to connect to (default: 80) @param rcpath: the sockets.io path. For Wikimedia wikis, this is '/rc'. (default: '/rc') @param total: the maximum number of entries to return. The underlying thread is shut down then this number is reached. @return: yield dict as formatted by MediaWiki's MachineReadableRCFeedFormatter, which consists of at least id (recent changes id), type ('edit', 'new', 'log' or 'external'), namespace, title, comment, timestamp, user and bot (bot flag for the change). @see: U{MachineReadableRCFeedFormatter<https://doc.wikimedia.org/ mediawiki-core/master/php/classMachineReadableRCFeedFormatter.html>} @rtype: generator @raises ImportError """ if isinstance(socketIO_client, Exception): raise ImportError('socketIO_client is required for the rc stream;\n' 'install it with pip install "socketIO_client==0.5.6"') rc_thread = RcListenerThread( wikihost=wikihost, rchost=rchost, rcport=rcport, rcpath=rcpath, total=total ) debug('Starting rcstream thread %r' % rc_thread, _logger) rc_thread.start() while True: try: element = rc_thread.queue.get(timeout=0.1) except Empty: continue if element is None: return yield element
def on_change(self, change): debug('Received change %r' % change, _logger) if not thread.running: debug('Thread in shutdown mode; ignoring change.', _logger) return thread.count += 1 thread.queue.put(change) if thread.queue.qsize() > thread.warn_queue_length: warning('%r queue length exceeded %i' % (thread, thread.warn_queue_length), _logger=_logger) thread.warn_queue_length = thread.warn_queue_length + 100 if thread.total is not None and thread.count >= thread.total: thread.stop() return
def rc_listener(wikihost, rchost, rcport=80, rcpath='/rc', total=None): """Yield changes received from RCstream. @param wikihost: the hostname of the wiki we want to get changes for. This is passed to rcstream using a 'subscribe' command. Pass '*' to listen to all wikis for a given rc host. @param rchost: the recent changes stream host to connect to. For Wikimedia wikis, this is 'stream.wikimedia.org' @param rcport: the port to connect to (default: 80) @param rcpath: the sockets.io path. For Wikimedia wikis, this is '/rc'. (default: '/rc') @param total: the maximum number of entries to return. The underlying thread is shut down then this number is reached. @yields dict: dict as formatted by MediaWiki's MachineReadableRCFeedFormatter[1], which consists of at least id (recent changes id), type ('edit', 'new', 'log' or 'external'), namespace, title, comment, timestamp, user and bot (bot flag for the change). See [1] for more details. @raises ImportError [1]: See mediawiki/includes/rcfeed/MachineReadableRCFeedFormatter.php """ if isinstance(socketIO_client, Exception): raise ImportError( 'socketIO_client is required for the rc stream;\n' 'install it with pip install "socketIO_client==0.5.6"') rc_thread = RcListenerThread(wikihost=wikihost, rchost=rchost, rcport=rcport, rcpath=rcpath, total=total) debug('Starting rcstream thread %r' % rc_thread, _logger) rc_thread.start() while True: try: element = rc_thread.queue.get(timeout=0.1) except Empty: continue if element is None: return yield element
def Site(code=None, fam=None, user=None, sysop=None, interface=None): """A factory method to obtain a Site object. Site objects are cached and reused by this method. By default rely on config settings. These defaults may all be overriden using the method parameters. @param code: language code (override config.mylang) @type code: string @param fam: family name or object (override config.family) @type fam: string or Family @param user: bot user name to use on this site (override config.usernames) @type user: unicode @param sysop: sysop user to use on this site (override config.sysopnames) @type sysop: unicode @param interface: site interface (override config.site_interface) @type interface: string """ _logger = "wiki" # Fallback to config defaults code = code or config.mylang fam = fam or config.family interface = interface or config.site_interface # config.usernames is initialised with a dict for each family name family_name = str(fam) if family_name in config.usernames: user = user or config.usernames[family_name].get(code) \ or config.usernames[family_name].get('*') sysop = sysop or config.sysopnames[family_name].get(code) \ or config.sysopnames[family_name].get('*') try: tmp = __import__('pywikibot.site', fromlist=[interface]) __Site = getattr(tmp, interface) except ImportError: raise ValueError("Invalid interface name '%(interface)s'" % locals()) key = '%s:%s:%s' % (fam, code, user) if key not in _sites or not isinstance(_sites[key], __Site): _sites[key] = __Site(code=code, fam=fam, user=user, sysop=sysop) debug(u"Instantiating Site object '%(site)s'" % {'site': _sites[key]}, _logger) return _sites[key]
def wrapper(*__args, **__kw): meth_name = method.__name__ if old_arg in __kw: if new_arg: if new_arg in __kw: warning( u"%(new_arg)s argument of %(meth_name)s replaces %(old_arg)s; cannot use both." % locals()) else: warning( u"%(old_arg)s argument of %(meth_name)s is deprecated; use %(new_arg)s instead." % locals()) __kw[new_arg] = __kw[old_arg] else: debug( u"%(old_arg)s argument of %(meth_name)s is deprecated." % locals(), _logger) del __kw[old_arg] return method(*__args, **__kw)
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None): """A factory method to obtain a Site object. Site objects are cached and reused by this method. By default rely on config settings. These defaults may all be overridden using the method parameters. @param code: language code (override config.mylang) @type code: string @param fam: family name or object (override config.family) @type fam: string or Family @param user: bot user name to use on this site (override config.usernames) @type user: unicode @param sysop: sysop user to use on this site (override config.sysopnames) @type sysop: unicode @param interface: site class or name of class in pywikibot.site (override config.site_interface) @type interface: subclass of L{pywikibot.site.BaseSite} or string @param url: Instead of code and fam, does try to get a Site based on the URL. Still requires that the family supporting that URL exists. @type url: string """ # Either code and fam or only url if url and (code or fam): raise ValueError('URL to the wiki OR a pair of code and family name ' 'should be provided') _logger = "wiki" if url: if url not in _url_cache: matched_sites = [] # Iterate through all families and look, which does apply to # the given URL for fam in config.family_files: family = pywikibot.family.Family.load(fam) code = family.from_url(url) if code is not None: matched_sites += [(code, fam)] if matched_sites: if len(matched_sites) > 1: pywikibot.warning( 'Found multiple matches for URL "{0}": {1} (use first)' .format(url, ', '.join(str(s) for s in matched_sites))) _url_cache[url] = matched_sites[0] else: # TODO: As soon as AutoFamily is ready, try and use an # AutoFamily _url_cache[url] = None cached = _url_cache[url] if cached: code = cached[0] fam = cached[1] else: raise SiteDefinitionError("Unknown URL '{0}'.".format(url)) else: # Fallback to config defaults code = code or config.mylang fam = fam or config.family interface = interface or config.site_interface # config.usernames is initialised with a dict for each family name family_name = str(fam) if family_name in config.usernames: user = user or config.usernames[family_name].get(code) \ or config.usernames[family_name].get('*') sysop = sysop or config.sysopnames[family_name].get(code) \ or config.sysopnames[family_name].get('*') if not isinstance(interface, type): # If it isnt a class, assume it is a string try: tmp = __import__('pywikibot.site', fromlist=[interface]) interface = getattr(tmp, interface) except ImportError: raise ValueError("Invalid interface name '%(interface)s'" % locals()) if not issubclass(interface, pywikibot.site.BaseSite): warning('Site called with interface=%s' % interface.__name__) user = pywikibot.tools.normalize_username(user) key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user) if key not in _sites or not isinstance(_sites[key], interface): _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop) debug(u"Instantiated %s object '%s'" % (interface.__name__, _sites[key]), _logger) if _sites[key].code != code: warn('Site %s instantiated using different code "%s"' % (_sites[key], code), UserWarning, 2) return _sites[key]
def on_connect(self): debug('Connected to {0!r}; subscribing to {1!s}'.format(thread, thread.wikihost), _logger) self.emit('subscribe', thread.wikihost) debug('Subscribed to {0!s}'.format(thread.wikihost), _logger)
def on_reconnect(self): debug('Reconnected to {0!r}'.format(thread), _logger) self.on_connect()
def on_reconnect(self): debug('Reconnected to %r' % (thread,), _logger) self.on_connect()
def on_connect(self): debug('Connected to %r; subscribing to %s' % (thread, thread.wikihost), _logger) self.emit('subscribe', thread.wikihost) debug('Subscribed to %s' % thread.wikihost, _logger)
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None): """A factory method to obtain a Site object. Site objects are cached and reused by this method. By default rely on config settings. These defaults may all be overridden using the method parameters. @param code: language code (override config.mylang) @type code: string @param fam: family name or object (override config.family) @type fam: string or Family @param user: bot user name to use on this site (override config.usernames) @type user: unicode @param sysop: sysop user to use on this site (override config.sysopnames) @type sysop: unicode @param interface: site class or name of class in pywikibot.site (override config.site_interface) @type interface: subclass of L{pywikibot.site.BaseSite} or string @param url: Instead of code and fam, does try to get a Site based on the URL. Still requires that the family supporting that URL exists. @type url: string """ # Either code and fam or only url assert (not url or (not code and not fam)) _logger = "wiki" if url: if url in _url_cache: cached = _url_cache[url] if cached: code = cached[0] fam = cached[1] else: raise SiteDefinitionError("Unknown URL '{0}'.".format(url)) else: # Iterate through all families and look, which does apply to # the given URL for fam in config.family_files: try: family = pywikibot.family.Family.load(fam) code = family.from_url(url) if code: _url_cache[url] = (code, fam) break except Exception as e: pywikibot.warning('Error in Family(%s).from_url: %s' % (fam, e)) else: _url_cache[url] = None # TODO: As soon as AutoFamily is ready, try and use an # AutoFamily raise SiteDefinitionError("Unknown URL '{0}'.".format(url)) else: # Fallback to config defaults code = code or config.mylang fam = fam or config.family interface = interface or config.site_interface # config.usernames is initialised with a dict for each family name family_name = str(fam) if family_name in config.usernames: user = user or config.usernames[family_name].get(code) \ or config.usernames[family_name].get('*') sysop = sysop or config.sysopnames[family_name].get(code) \ or config.sysopnames[family_name].get('*') if not isinstance(interface, type): # If it isnt a class, assume it is a string try: tmp = __import__('pywikibot.site', fromlist=[interface]) interface = getattr(tmp, interface) except ImportError: raise ValueError("Invalid interface name '%(interface)s'" % locals()) if not issubclass(interface, pywikibot.site.BaseSite): warning('Site called with interface=%s' % interface.__name__) user = pywikibot.tools.normalize_username(user) key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user) if key not in _sites or not isinstance(_sites[key], interface): _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop) debug( u"Instantiated %s object '%s'" % (interface.__name__, _sites[key]), _logger) if _sites[key].code != code: warn( 'Site %s instantiated using different code "%s"' % (_sites[key], code), UserWarning, 2) return _sites[key]
def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None): """A factory method to obtain a Site object. Site objects are cached and reused by this method. By default rely on config settings. These defaults may all be overriden using the method parameters. @param code: language code (override config.mylang) @type code: string @param fam: family name or object (override config.family) @type fam: string or Family @param user: bot user name to use on this site (override config.usernames) @type user: unicode @param sysop: sysop user to use on this site (override config.sysopnames) @type sysop: unicode @param interface: site class or name of class in pywikibot.site (override config.site_interface) @type interface: subclass of L{pywikibot.site.BaseSite} or string @param url: Instead of code and fam, does try to get a Site based on the URL. Still requires that the family supporting that URL exists. @type url: string """ # Either code and fam or only url assert(not url or (not code and not fam)) _logger = "wiki" if url: if url in _url_cache: cached = _url_cache[url] if cached: code = cached[0] fam = cached[1] else: raise Error("Unknown URL '{0}'.".format(url)) else: # Iterate through all families and look, which does apply to # the given URL for fam in config.family_files: family = pywikibot.family.Family.load(fam) code = family.from_url(url) if code: _url_cache[url] = (code, fam) break else: _url_cache[url] = None # TODO: As soon as AutoFamily is ready, try and use an # AutoFamily raise Error("Unknown URL '{0}'.".format(url)) else: # Fallback to config defaults code = code or config.mylang fam = fam or config.family interface = interface or config.site_interface # config.usernames is initialised with a dict for each family name family_name = str(fam) if family_name in config.usernames: user = user or config.usernames[family_name].get(code) \ or config.usernames[family_name].get('*') sysop = sysop or config.sysopnames[family_name].get(code) \ or config.sysopnames[family_name].get('*') if not isinstance(interface, type): # If it isnt a class, assume it is a string try: tmp = __import__('pywikibot.site', fromlist=[interface]) interface = getattr(tmp, interface) except ImportError: raise ValueError("Invalid interface name '%(interface)s'" % locals()) if not issubclass(interface, pywikibot.site.BaseSite): warning('Site called with interface=%s' % interface.__name__) key = '%s:%s:%s:%s' % (interface.__name__, fam, code, user) if key not in _sites or not isinstance(_sites[key], interface): _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop) debug(u"Instantiated %s object '%s'" % (interface.__name__, _sites[key]), _logger) return _sites[key]
def on_connect(self): debug( 'Connected to %r; subscribing to %s' % (thread, thread.wikihost), _logger) self.emit('subscribe', thread.wikihost) debug('Subscribed to %s' % thread.wikihost, _logger)
def on_reconnect(self): debug('Reconnected to %r' % (thread, ), _logger) self.on_connect()