コード例 #1
0
def test_request_fingerprint_splash():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com", meta={"splash": {"args": {"html": 1}}})
    r3 = scrapy.Request("http://example.com", meta={"splash": {"args": {"png": 1}}})
    r4 = scrapy.Request("http://example.com", meta={"foo": "bar", "splash": {"args": {"html": 1}}})
    r5 = scrapy.Request("http://example.com", meta={"splash": {"args": {"html": 1, "wait": 1.0}}})

    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r3)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r4)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r5)
    assert splash_request_fingerprint(r2) != splash_request_fingerprint(r3)

    # only "splash" contents is taken into account
    assert splash_request_fingerprint(r2) == splash_request_fingerprint(r4)
コード例 #2
0
def test_request_fingerprint_splash():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com",
                        meta={"splash": {
                            "args": {
                                "html": 1
                            }
                        }})
    r3 = scrapy.Request("http://example.com",
                        meta={"splash": {
                            "args": {
                                "png": 1
                            }
                        }})
    r4 = scrapy.Request("http://example.com",
                        meta={
                            "foo": "bar",
                            "splash": {
                                "args": {
                                    "html": 1
                                }
                            }
                        })
    r5 = scrapy.Request("http://example.com",
                        meta={"splash": {
                            "args": {
                                "html": 1,
                                "wait": 1.0
                            }
                        }})

    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r3)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r4)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r5)
    assert splash_request_fingerprint(r2) != splash_request_fingerprint(r3)

    # only "splash" contents is taken into account
    assert splash_request_fingerprint(r2) == splash_request_fingerprint(r4)
コード例 #3
0
def test_request_fingerprint_prerender():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com",
                        meta={"prerender": {
                            "args": {
                                "html": 1
                            }
                        }})
    r3 = scrapy.Request("http://example.com",
                        meta={"prerender": {
                            "args": {
                                "png": 1
                            }
                        }})
    r4 = scrapy.Request("http://example.com",
                        meta={
                            "foo": "bar",
                            "prerender": {
                                "args": {
                                    "html": 1
                                }
                            }
                        })
    r5 = scrapy.Request("http://example.com",
                        meta={"prerender": {
                            "args": {
                                "html": 1,
                                "wait": 1.0
                            }
                        }})

    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert_fingerprints_dont_match(r1, r2)
    assert_fingerprints_dont_match(r1, r3)
    assert_fingerprints_dont_match(r1, r4)
    assert_fingerprints_dont_match(r1, r5)
    assert_fingerprints_dont_match(r2, r3)

    # only "prerender" contents is taken into account
    assert_fingerprints_match(r2, r4)
コード例 #4
0
    def request_fingerprint(self, request):
        """Returns a fingerprint for a given request.

        Parameters
        ----------
        request : scrapy.http.Request

        Returns
        -------
        str

        """
        # splash_request_fingerprint 会自动判断 request 是否符合 SplashRequest 特征
        # 如果符合 SplashRequest 特征会进一步处理,否则就和普通的 request_fingerprint 是一样的效果
        return request_fingerprint(request)
コード例 #5
0
def test_request_fingerprint_nosplash():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com", meta={"foo": "bar"})
    assert request_fingerprint(r1) == splash_request_fingerprint(r1)
    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert request_fingerprint(r1) == splash_request_fingerprint(r2)
コード例 #6
0
def test_request_fingerprint_nosplash():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com", meta={"foo": "bar"})
    assert request_fingerprint(r1) == splash_request_fingerprint(r1)
    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert request_fingerprint(r1) == splash_request_fingerprint(r2)
コード例 #7
0
def test_request_fingerprint_noprerender():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com", meta={"foo": "bar"})
    assert request_fingerprint(r1) == prerender_request_fingerprint(r1)
    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert request_fingerprint(r1) == prerender_request_fingerprint(r2)