def _run_crawler_plain(self, crawler_class, other_options=None): url = self.get_url("/index.html") sys.argv = ['pylinkchecker', "-m", "process", url] if not other_options: other_options = [] sys.argv.extend(other_options) config = Config() config.parse_config() crawler = crawler_class(config, get_logger()) crawler.crawl() return crawler.site
def test_accepted_hosts(self): sys.argv = ['pylinkchecker', 'http://www.example.com/'] config = Config() config.parse_config() self.assertTrue('www.example.com' in config.accepted_hosts) sys.argv = ['pylinkchecker', '-H', 'www.example.com', 'http://example.com', 'foo.com', 'http://www.example.com/', 'baz.com'] config = Config() config.parse_config() self.assertTrue('www.example.com' in config.accepted_hosts) self.assertTrue('example.com' in config.accepted_hosts) self.assertTrue('foo.com' in config.accepted_hosts) self.assertTrue('baz.com' in config.accepted_hosts)
def execute_from_command_line(): start = time.time() config = Config() config.parse_config() if not config.start_urls: print("At least one starting URL must be supplied.") sys.exit(1) if config.options.verbose == VERBOSE_QUIET: logging.basicConfig(level=logging.CRITICAL) elif config.options.verbose == VERBOSE_NORMAL: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.DEBUG) logger = get_logger() if config.options.mode == MODE_THREAD: crawler = ThreadSiteCrawler(config, logger) elif config.options.mode == MODE_PROCESS: crawler = ProcessSiteCrawler(config, logger) elif config.options.mode == MODE_GREEN: crawler = GreenSiteCrawler(config, logger) if not crawler: print("Invalid crawling mode supplied.") sys.exit(1) crawler.crawl() stop = time.time() if not crawler.site.is_ok or config.options.when == WHEN_ALWAYS: report(crawler.site, config, stop - start, logger) if not crawler.site.is_ok: sys.exit(1)