Beispiel #1
0
def too_soon_response(too_soon_urls):
    target_url = too_soon_urls[0]

    srcdir = FIXTURES_DIR.joinpath("job-save-too-soon")
    submit_resptext = srcdir.joinpath("submit-response.html").read_text()

    with responses.RequestsMock() as rsps:
        rsps.add(
            "POST",
            wb.url_for_savepage(target_url),
            body=submit_resptext,
            status=200,
            match=[
                responses.urlencoded_params_matcher(
                    {"url": target_url, "capture_all": "on"}
                )
            ],
        )

        rsps.add(
            "GET",
            wb.url_for_jobstatus(wb.extract_job_id(submit_resptext)),
            body=srcdir.joinpath("status-0.json").read_text(),
            status=200,
        )

        yield rsps
def test_snapshot_too_soon():
    srcdir = FIXTURES_DIR.joinpath("job-save-too-soon")
    target_url = "https://plainlanguage.gov/"

    submit_resptext = srcdir.joinpath("submit-response.html").read_text()

    responses.add(
        "POST",
        wb.url_for_savepage(target_url),
        body=submit_resptext,
        status=200,
        match=[
            responses.urlencoded_params_matcher(
                {"url": target_url, "capture_all": "on"}
            )
        ],
    )

    responses.add(
        "GET",
        wb.url_for_jobstatus(wb.extract_job_id(submit_resptext)),
        body=srcdir.joinpath("status-0.json").read_text(),
        status=200,
    )

    answer, meta = wb.snapshot(target_url)

    assert answer == meta.snapshot_url
    assert meta.subcommand == "snapshot"
    assert meta.was_new_snapshot_created() is False
    assert (
        meta.too_soon()
        == "The same snapshot had been made 4 minutes and 18 seconds ago. We only allow new captures of the same URL every 20 minutes."
    )
def test_save_job_polling():
    jid = "af709a09-c909-4883-b6b3-7350f9be8d7c"
    url = wb.url_for_jobstatus(jid)
    resptext = FIXTURES_DIR.joinpath("job-status-success.json").read_text()
    responses.add("GET", url, body=resptext)

    resp = wb.fetch_job_status(jid)
    assert resp["status"] == "success"
Beispiel #4
0
def save_success_response(success_urls):
    srcdir = FIXTURES_DIR.joinpath("job-save-success")
    target_url = success_urls[0]

    submit_resptext = srcdir.joinpath("submit-response.html").read_text()
    expected_job_url = wb.url_for_jobstatus(submit_resptext)

    status_paths = iter(
        [
            srcdir.joinpath("status-0.json"),
            srcdir.joinpath("status-1.json"),
            srcdir.joinpath("status-9.json"),
            srcdir.joinpath("status-10.json"),
        ]
    )

    with responses.RequestsMock() as rsps:
        rsps.add(
            "POST",
            wb.url_for_savepage(target_url),
            body=submit_resptext,
            status=200,
            match=[
                responses.urlencoded_params_matcher(
                    {"url": target_url, "capture_all": "on"}
                )
            ],
        )

        rsps.add_callback(
            "GET",
            expected_job_url,
            callback=lambda req: (
                200,
                {},
                next(status_paths).read_text(),
            ),  # 2nd arg is a headers dict
        )

        yield rsps
def test_url_for_jobstatus():
    assert (wb.url_for_jobstatus("abc-789") ==
            "http://web.archive.org/save/status/abc-789")
def test_snapshot_successful(success_status_paths):
    #### fixture setup (todo: refactor?)
    srcdir = FIXTURES_DIR.joinpath("job-save-success")

    target_url = "https://plainlanguage.gov/"
    save_url = wb.url_for_savepage(target_url)

    submit_resptext = srcdir.joinpath("submit-response.html").read_text()
    expected_job_id = wb.extract_job_id(submit_resptext)
    expected_job_url = wb.url_for_jobstatus(expected_job_id)

    #### mock responses
    responses.add(
        "POST",
        save_url,
        body=submit_resptext,
        status=200,
        match=[
            responses.urlencoded_params_matcher(
                {"url": target_url, "capture_all": "on"}
            )
        ],
    )

    responses.add_callback(
        "GET",
        expected_job_url,
        callback=lambda req: (
            200,
            {},
            next(success_status_paths).read_text(),
        ),  # 2nd arg is a headers dict
    )

    answer, meta = wb.snapshot(target_url, user_agent="guy incognito", poll_interval=0)

    # make sure snapshot, as expected by the setup, exhausted the success_status_paths iterator
    assert next(success_status_paths, False) is False

    # test return values
    assert type(answer) is str
    assert type(meta) is wb.TaskMeta
    assert meta.subcommand == "snapshot"
    assert meta.target_url == target_url
    assert meta.created_at.strftime("%Y-%m-%d %H:%M:%S%z") == "2020-09-01 14:30:55+0000"

    data = meta.to_dict()
    # test that answer is snapshot url
    assert (
        answer
        == wb.BASE_DOMAIN
        + "/web/"
        + data["server_payload"]["timestamp"]
        + "/"
        + target_url
    )

    # test data response
    assert data["subcommand"] == "snapshot"
    assert data["was_new_snapshot_created"] is True
    assert data["snapshot_url"] == answer
    assert data["request_meta"]["user_agent"] == "guy incognito"

    issues = data["issues"]
    assert issues["too_soon"] is False
    assert issues["too_many_during_period"] is False

    jd = data["server_payload"]
    assert jd["status"] == "success"
    assert jd["timestamp"] in data["snapshot_url"]

    # not sure if this is always the case...what happens if there's a redirect?
    assert jd["original_url"] == target_url