def _run_crawler_plain(self, crawler_class, other_options=None): url = self.get_url("/index.html") sys.argv = ['pylinkchecker', "-m", "process", url] if not other_options: other_options = [] sys.argv.extend(other_options) config = Config() config.parse_cli_config() crawler = crawler_class(config, get_logger()) crawler.crawl() return crawler.site
def _run_crawler_plain(self, crawler_class, other_options=None): url = self.get_url("/index.html") sys.argv = ['pylinkchecker', "-m", "process", url] if not other_options: other_options = [] sys.argv.extend(other_options) config = Config() config.parse_cli_config() crawler = crawler_class(config, get_logger()) crawler.crawl() return crawler.site
def test_accepted_hosts(self): sys.argv = ['pylinkchecker', 'http://www.example.com/'] config = Config() config.parse_cli_config() self.assertTrue('www.example.com' in config.accepted_hosts) sys.argv = ['pylinkchecker', '-H', 'www.example.com', 'http://example.com', 'foo.com', 'http://www.example.com/', 'baz.com'] config = Config() config.parse_cli_config() self.assertTrue('www.example.com' in config.accepted_hosts) self.assertTrue('example.com' in config.accepted_hosts) self.assertTrue('foo.com' in config.accepted_hosts) self.assertTrue('baz.com' in config.accepted_hosts)
def test_accepted_hosts(self): sys.argv = ['pylinkchecker', 'http://www.example.com/'] config = Config() config.parse_cli_config() self.assertTrue('www.example.com' in config.accepted_hosts) sys.argv = ['pylinkchecker', '-H', 'www.example.com', 'http://example.com', 'foo.com', 'http://www.example.com/', 'baz.com'] config = Config() config.parse_cli_config() self.assertTrue('www.example.com' in config.accepted_hosts) self.assertTrue('example.com' in config.accepted_hosts) self.assertTrue('foo.com' in config.accepted_hosts) self.assertTrue('baz.com' in config.accepted_hosts)
def execute_from_command_line(): """Runs the crawler and retrieves the configuration from the command line.""" try: start = time.time() config = Config() config.parse_cli_config() logger = configure_logger(config) crawler = execute_from_config(config, logger) stop = time.time() if not crawler.site.is_ok or config.options.when == WHEN_ALWAYS: report(crawler.site, config, stop - start, logger) if not crawler.site.is_ok: sys.exit(1) except Exception as e: print(e) sys.exit(1)
def execute_from_command_line(): """Runs the crawler and retrieves the configuration from the command line.""" try: start = time.time() config = Config() config.parse_cli_config() logger = configure_logger(config) crawler = execute_from_config(config, logger) stop = time.time() if not crawler.site.is_ok or config.options.when == WHEN_ALWAYS: report(crawler.site, config, stop - start, logger) if not crawler.site.is_ok: sys.exit(1) except Exception as e: print(e) sys.exit(1)