def __init__(self, HTTPignore=None, day=7, **kwargs): """Initializer.""" super().__init__(**kwargs) if config.report_dead_links_on_talk: pywikibot.log('Starting talk page thread') reportThread = DeadLinkReportThread() reportThread.start() else: reportThread = None self.history = History(reportThread, site=self.site) self.HTTPignore = HTTPignore or [] self.day = day # Limit the number of threads started at the same time self.threads = ThreadList(limit=config.max_external_links, wait_time=config.retry_wait)
def __init__(self, generator, HTTPignore=None, day=7, site=True): """Initializer.""" super().__init__(generator=generator, site=site) if config.report_dead_links_on_talk: pywikibot.log('Starting talk page thread') reportThread = DeadLinkReportThread() # thread dies when program terminates # reportThread.setDaemon(True) reportThread.start() else: reportThread = None self.history = History(reportThread, site=self.site) self.HTTPignore = HTTPignore or [] self.day = day # Limit the number of threads started at the same time self.threads = ThreadList(limit=config.max_external_links, wait_time=config.retry_wait)
class WeblinkCheckerRobot(SingleSiteBot, ExistingPageBot): """ Bot which will search for dead weblinks. It uses several LinkCheckThreads at once to process pages from generator. """ def __init__(self, generator, HTTPignore=None, day=7, site=True): """Initializer.""" super().__init__(generator=generator, site=site) if config.report_dead_links_on_talk: pywikibot.log('Starting talk page thread') reportThread = DeadLinkReportThread() # thread dies when program terminates # reportThread.setDaemon(True) reportThread.start() else: reportThread = None self.history = History(reportThread, site=self.site) self.HTTPignore = HTTPignore or [] self.day = day # Limit the number of threads started at the same time self.threads = ThreadList(limit=config.max_external_links, wait_time=config.retry_wait) def treat_page(self): """Process one page.""" page = self.current_page for url in weblinksIn(page.text): for ignoreR in ignorelist: if ignoreR.match(url): break else: # Each thread will check one page, then die. thread = LinkCheckThread(page, url, self.history, self.HTTPignore, self.day) # thread dies when program terminates thread.setDaemon(True) self.threads.append(thread)