Exemple #1
async def test_qs_limit():
    crawler = AsyncCrawler("")
    explorer = Explorer(crawler, Event())
    start_urls = deque([""])
    excluded_urls = []
    # We should have root url, huge form page, target and target with POST method
    assert len([__ async for __ in explorer.async_explore(start_urls, excluded_urls)]) == 4
    await crawler.close()

    crawler = AsyncCrawler("")
    explorer = Explorer(crawler, Event())
    # Exclude huge POST form with limit of parameters
    explorer.qs_limit = 500
    start_urls = deque([""])
    excluded_urls = []
    # We should have root url, huge form page, target and target with POST method
    assert len([__ async for __ in explorer.async_explore(start_urls, excluded_urls)]) == 3
    await crawler.close()
Exemple #2
async def test_explorer_filtering():
    crawler = AsyncCrawler("")
    explorer = Explorer(crawler, Event())
    start_urls = deque([""])
    excluded_urls = []
    results = set([resource.url async for resource in explorer.async_explore(start_urls, excluded_urls)])
    # We should have current URL and JS URL but without query string.
    # CSS URL should be excluded
    assert results == {"", ""}
    await crawler.close()