Example #1
0
    def __init__(self, HTTPignore=None, day=7, **kwargs):
        """Initializer."""
        super().__init__(**kwargs)

        if config.report_dead_links_on_talk:
            pywikibot.log('Starting talk page thread')
            reportThread = DeadLinkReportThread()
            reportThread.start()
        else:
            reportThread = None
        self.history = History(reportThread, site=self.site)
        self.HTTPignore = HTTPignore or []
        self.day = day

        # Limit the number of threads started at the same time
        self.threads = ThreadList(limit=config.max_external_links,
                                  wait_time=config.retry_wait)
Example #2
0
    def __init__(self, generator, HTTPignore=None, day=7, site=True):
        """Initializer."""
        super().__init__(generator=generator, site=site)

        if config.report_dead_links_on_talk:
            pywikibot.log('Starting talk page thread')
            reportThread = DeadLinkReportThread()
            # thread dies when program terminates
            # reportThread.setDaemon(True)
            reportThread.start()
        else:
            reportThread = None
        self.history = History(reportThread, site=self.site)
        self.HTTPignore = HTTPignore or []
        self.day = day

        # Limit the number of threads started at the same time
        self.threads = ThreadList(limit=config.max_external_links,
                                  wait_time=config.retry_wait)
Example #3
0
class WeblinkCheckerRobot(SingleSiteBot, ExistingPageBot):

    """
    Bot which will search for dead weblinks.

    It uses several LinkCheckThreads at once to process pages from generator.
    """

    def __init__(self, generator, HTTPignore=None, day=7, site=True):
        """Initializer."""
        super().__init__(generator=generator, site=site)

        if config.report_dead_links_on_talk:
            pywikibot.log('Starting talk page thread')
            reportThread = DeadLinkReportThread()
            # thread dies when program terminates
            # reportThread.setDaemon(True)
            reportThread.start()
        else:
            reportThread = None
        self.history = History(reportThread, site=self.site)
        self.HTTPignore = HTTPignore or []
        self.day = day

        # Limit the number of threads started at the same time
        self.threads = ThreadList(limit=config.max_external_links,
                                  wait_time=config.retry_wait)

    def treat_page(self):
        """Process one page."""
        page = self.current_page
        for url in weblinksIn(page.text):
            for ignoreR in ignorelist:
                if ignoreR.match(url):
                    break
            else:
                # Each thread will check one page, then die.
                thread = LinkCheckThread(page, url, self.history,
                                         self.HTTPignore, self.day)
                # thread dies when program terminates
                thread.setDaemon(True)
                self.threads.append(thread)