Ejemplo n.º 1
0
def test_log_formatter_scrapy_1():
    middleware = get_test_middleware()
    logformatter = CrawleraFetchLogFormatter()
    formatter = Formatter()

    for case in get_test_requests():
        original = case["original"]
        response = Response(original.url)
        processed = middleware.process_request(original, foo_spider)

        crawlera_meta = original.meta.get("crawlera_fetch") or {}
        if crawlera_meta.get("skip"):
            assert processed is None
            continue

        # crawled
        result = logformatter.crawled(processed, response, foo_spider)
        assert result["args"]["request"] == str(original)
        record = LogRecord(name="logger",
                           pathname="n/a",
                           lineno=1,
                           exc_info=None,
                           **result)
        logstr = formatter.format(record)
        expected = "Crawled (200) {request} ['original url: {url}'] (referer: None)".format(
            request=original, url=original.url)
        assert logstr == expected
def test_process_request_single_download_slot():
    middleware = get_test_middleware(
        settings={
            "CRAWLERA_FETCH_DOWNLOAD_SLOT_POLICY": DownloadSlotPolicy.Single
        })

    for case in get_test_requests():
        original = case["original"]
        expected = case["expected"]
        if expected:
            expected.meta["download_slot"] = "__crawlera_fetch__"

        with shub_jobkey_env_variable():
            processed = middleware.process_request(original, foo_spider)

        crawlera_meta = original.meta.get("crawlera_fetch")
        if crawlera_meta.get("skip"):
            assert processed is None
        else:
            assert type(processed) is type(expected)
            assert processed.url == expected.url
            assert processed.method == expected.method
            assert processed.headers == expected.headers
            assert processed.meta == expected.meta
            processed_text = processed.body.decode(processed.encoding)
            expected_text = expected.body.decode(expected.encoding)
            assert json.loads(processed_text) == json.loads(expected_text)
def test_process_request_disabled():
    middleware = get_test_middleware(
        settings={"CRAWLERA_FETCH_ENABLED": False})
    for case in get_test_requests():
        request = case["original"]
        with shub_jobkey_env_variable():
            assert middleware.process_request(request, foo_spider) is None
Ejemplo n.º 4
0
def test_log_formatter_scrapy_2():
    middleware = get_test_middleware()
    logformatter = CrawleraFetchLogFormatter()
    formatter = Formatter()

    for case in get_test_requests():
        original = case["original"]
        response = Response(original.url)
        processed = middleware.process_request(original, foo_spider)

        crawlera_meta = original.meta.get("crawlera_fetch") or {}
        if crawlera_meta.get("skip"):
            assert processed is None
            continue

        # crawled
        result = logformatter.crawled(processed, response, foo_spider)
        assert result["args"]["request"] == str(original)
        record = LogRecord(name="logger",
                           pathname="n/a",
                           lineno=1,
                           exc_info=None,
                           **result)
        logstr = formatter.format(record)
        assert logstr == "Crawled (200) %s (referer: None)" % str(original)

        # spider_error
        result = logformatter.spider_error(Failure(Exception("exc")),
                                           processed, response, foo_spider)
        assert result["args"]["request"] == str(original)
        record = LogRecord(name="logger",
                           pathname="n/a",
                           lineno=1,
                           exc_info=None,
                           **result)
        logstr = formatter.format(record)
        assert logstr == "Spider error processing %s (referer: None)" % str(
            original)

        # download_error
        result = logformatter.download_error(Failure(Exception("exc")),
                                             processed, foo_spider, "error")
        assert result["args"]["request"] == str(original)
        record = LogRecord(name="logger",
                           pathname="n/a",
                           lineno=2,
                           exc_info=None,
                           **result)
        logstr = formatter.format(record)
        assert logstr == "Error downloading %s: error" % str(original)
Ejemplo n.º 5
0
def test_process_request_default_args():
    middleware = get_test_middleware(
        settings={"CRAWLERA_FETCH_DEFAULT_ARGS": {"foo": "bar", "answer": "42"}}
    )

    for case in get_test_requests():
        original = case["original"]
        processed = middleware.process_request(original, foo_spider)

        crawlera_meta = original.meta.get("crawlera_fetch")
        if crawlera_meta.get("skip"):
            assert processed is None
        else:
            processed_text = processed.body.decode(processed.encoding)
            processed_json = json.loads(processed_text)
            assert processed_json["foo"] == "bar"
            assert processed_json["answer"] == "42"
def test_process_request():
    middleware = get_test_middleware()

    for case in get_test_requests():
        original = case["original"]
        expected = case["expected"]

        with shub_jobkey_env_variable():
            processed = middleware.process_request(original, foo_spider)

        crawlera_meta = original.meta.get("crawlera_fetch")
        if crawlera_meta.get("skip"):
            assert processed is None
        else:
            assert type(processed) is type(expected)
            assert processed.url == expected.url
            assert processed.method == expected.method
            assert processed.headers == expected.headers
            assert processed.meta == expected.meta
            processed_text = processed.body.decode(processed.encoding)
            expected_text = expected.body.decode(expected.encoding)
            assert json.loads(processed_text) == json.loads(expected_text)