コード例 #1
0
ファイル: tests.py プロジェクト: shezadkhan137/pylinkchecker
    def _run_crawler_plain(self, crawler_class, other_options=None):
        url = self.get_url("/index.html")
        sys.argv = ['pylinkchecker', "-m", "process", url]
        if not other_options:
            other_options = []
        sys.argv.extend(other_options)
        config = Config()
        config.parse_config()

        crawler = crawler_class(config, get_logger())
        crawler.crawl()

        return crawler.site
コード例 #2
0
ファイル: tests.py プロジェクト: shezadkhan137/pylinkchecker
    def test_accepted_hosts(self):
        sys.argv = ['pylinkchecker', 'http://www.example.com/']
        config = Config()
        config.parse_config()
        self.assertTrue('www.example.com' in config.accepted_hosts)

        sys.argv = ['pylinkchecker', '-H', 'www.example.com',
                'http://example.com', 'foo.com', 'http://www.example.com/',
                'baz.com']
        config = Config()
        config.parse_config()

        self.assertTrue('www.example.com' in config.accepted_hosts)
        self.assertTrue('example.com' in config.accepted_hosts)
        self.assertTrue('foo.com' in config.accepted_hosts)
        self.assertTrue('baz.com' in config.accepted_hosts)
コード例 #3
0
def execute_from_command_line():
    start = time.time()
    config = Config()
    config.parse_config()

    if not config.start_urls:
        print("At least one starting URL must be supplied.")
        sys.exit(1)

    if config.options.verbose == VERBOSE_QUIET:
        logging.basicConfig(level=logging.CRITICAL)
    elif config.options.verbose == VERBOSE_NORMAL:
        logging.basicConfig(level=logging.WARNING)
    else:
        logging.basicConfig(level=logging.DEBUG)

    logger = get_logger()

    if config.options.mode == MODE_THREAD:
        crawler = ThreadSiteCrawler(config, logger)
    elif config.options.mode == MODE_PROCESS:
        crawler = ProcessSiteCrawler(config, logger)
    elif config.options.mode == MODE_GREEN:
        crawler = GreenSiteCrawler(config, logger)

    if not crawler:
        print("Invalid crawling mode supplied.")
        sys.exit(1)

    crawler.crawl()

    stop = time.time()

    if not crawler.site.is_ok or config.options.when == WHEN_ALWAYS:
        report(crawler.site, config, stop - start, logger)

    if not crawler.site.is_ok:
        sys.exit(1)