def test_inclusion(app, client):
    wanted_urls = {'/', '/page-a'}
    crawler = Crawler(client=client,
                      initial_paths=['/'],
                      rules=[
                          Rule(ANCHOR, '^/$', GET, Request()),
                          Rule(ANCHOR, '^/page-a$', GET, Request())
                      ])
    crawler.crawl()
    assert crawler.graph.visited_paths == wanted_urls
def test_50x_trapped_but_allowed(app, client):
    crawler = Crawler(client=client,
                      initial_paths=['/'],
                      rules=[
                          Rule(ANCHOR, ".*", GET, Request()),
                          Rule(ANCHOR, ".*", GET, Allow([400, 500])),
                      ])
    crawler.crawl()
def test_50x_raising_exception(app, client):
    crawler = Crawler(client=client,
                      initial_paths=['/'],
                      capture_exceptions=False,
                      rules=[
                          Rule(ANCHOR, ".*", GET, Request()),
                          Rule(ANCHOR, ".*", GET, Allow([400]))
                      ])
    with pytest.raises(HttpStatusError) as excinfo:
        crawler.crawl()
    assert excinfo.value.status_code == 500
def test_submit_forms_with_extra_data(app, client):
    crawler = Crawler(
        client=client,
        initial_paths=['/'],
        rules=(PERMISSIVE_HYPERLINKS_ONLY_RULE_SET +
               SUBMIT_POST_FORMS_RULE_SET + [
                   Rule(FORM, ".*", GET, Request(params={'extra': 'extra'})),
                   Rule(FORM, ".*", POST, Request(params={'extra': 'extra'})),
               ]))
    crawler.crawl()

    # check we always submitted extra data when we submitted any
    submitted_forms = [
        form for form in crawler.graph.get_nodes_by_source(FORM)
        if form.requested
    ]
    assert len(submitted_forms) > 1
    for form in submitted_forms:
        entries = lookup_requests(app, form.path, method=form.method)
        for entry in entries:
            if entry.params:
                assert 'extra' in {key for key, val in entry.params}
Ejemplo n.º 5
0
def test_other_crawl(client, auth):
    auth.login()
    crawler = Crawler(
        client=client,
        initial_paths=['/'],
        rules=(
            ALL_ELEMENTS_RULE_SET + SUBMIT_GET_FORMS_RULE_SET +
            SUBMIT_POST_FORMS_RULE_SET + [
                # don't logout
                Rule(".*", r"/auth/logout", GET, Ignore()),

                # submit some data to create
                Rule(".*", r"/create", POST,
                     Request(params={
                         "title": "A Title",
                         "body": "body text"
                     })),

                # add the missing body when updating
                Rule(".*", r"/\d+/update", POST,
                     Request(params={"body": "updated body"})),
            ]),
    )
    crawler.crawl()
Ejemplo n.º 6
0
from python_testing_crawler import (
    Crawler,
    Rule,
    Request,
    Ignore,
    Allow,
)

GET = "GET"
POST = "POST"

ALL_ELEMENTS_RULE_SET = [Rule('.*', '/.*', GET, Request())]
SUBMIT_GET_FORMS_RULE_SET = [Rule("form", '.*', GET, Request())]
SUBMIT_POST_FORMS_RULE_SET = [Rule("form", '.*', POST, Request())]


def _crawl(client):
    crawler = Crawler(
        client=client,
        initial_paths=['/'],
        rules=(
            ALL_ELEMENTS_RULE_SET + SUBMIT_GET_FORMS_RULE_SET +
            SUBMIT_POST_FORMS_RULE_SET + [
                # don't logout
                Rule(".*", r"/auth/logout", GET, Ignore()),
                # allow 400 on create and update
                Rule(".*", r"/create", POST, Allow([400])),
                Rule(".*", r"/\d+/update", POST, Allow([400])),
            ]),
    )
    crawler.crawl()