Esempio n. 1
0
    def _run_crawler_plain(self, crawler_class, other_options=None):
        url = self.get_url("/index.html")
        sys.argv = ['pylinkchecker', "-m", "process", url]
        if not other_options:
            other_options = []
        sys.argv.extend(other_options)
        config = Config()
        config.parse_cli_config()

        crawler = crawler_class(config, get_logger())
        crawler.crawl()

        return crawler.site
Esempio n. 2
0
    def _run_crawler_plain(self, crawler_class, other_options=None):
        url = self.get_url("/index.html")
        sys.argv = ['pylinkchecker', "-m", "process", url]
        if not other_options:
            other_options = []
        sys.argv.extend(other_options)
        config = Config()
        config.parse_cli_config()

        crawler = crawler_class(config, get_logger())
        crawler.crawl()

        return crawler.site
Esempio n. 3
0
    def test_accepted_hosts(self):
        sys.argv = ['pylinkchecker', 'http://www.example.com/']
        config = Config()
        config.parse_cli_config()
        self.assertTrue('www.example.com' in config.accepted_hosts)

        sys.argv = ['pylinkchecker', '-H', 'www.example.com',
                'http://example.com', 'foo.com', 'http://www.example.com/',
                'baz.com']
        config = Config()
        config.parse_cli_config()

        self.assertTrue('www.example.com' in config.accepted_hosts)
        self.assertTrue('example.com' in config.accepted_hosts)
        self.assertTrue('foo.com' in config.accepted_hosts)
        self.assertTrue('baz.com' in config.accepted_hosts)
Esempio n. 4
0
    def test_accepted_hosts(self):
        sys.argv = ['pylinkchecker', 'http://www.example.com/']
        config = Config()
        config.parse_cli_config()
        self.assertTrue('www.example.com' in config.accepted_hosts)

        sys.argv = ['pylinkchecker', '-H', 'www.example.com',
                'http://example.com', 'foo.com', 'http://www.example.com/',
                'baz.com']
        config = Config()
        config.parse_cli_config()

        self.assertTrue('www.example.com' in config.accepted_hosts)
        self.assertTrue('example.com' in config.accepted_hosts)
        self.assertTrue('foo.com' in config.accepted_hosts)
        self.assertTrue('baz.com' in config.accepted_hosts)
Esempio n. 5
0
def execute_from_command_line():
    """Runs the crawler and retrieves the configuration from the command line."""
    try:
        start = time.time()
        config = Config()
        config.parse_cli_config()

        logger = configure_logger(config)
        crawler = execute_from_config(config, logger)

        stop = time.time()

        if not crawler.site.is_ok or config.options.when == WHEN_ALWAYS:
            report(crawler.site, config, stop - start, logger)

        if not crawler.site.is_ok:
            sys.exit(1)
    except Exception as e:
        print(e)
        sys.exit(1)
Esempio n. 6
0
def execute_from_command_line():
    """Runs the crawler and retrieves the configuration from the command line."""
    try:
        start = time.time()
        config = Config()
        config.parse_cli_config()

        logger = configure_logger(config)
        crawler = execute_from_config(config, logger)

        stop = time.time()

        if not crawler.site.is_ok or config.options.when == WHEN_ALWAYS:
            report(crawler.site, config, stop - start, logger)

        if not crawler.site.is_ok:
            sys.exit(1)
    except Exception as e:
        print(e)
        sys.exit(1)