def test_request_fingerprint_splash(): r1 = scrapy.Request("http://example.com") r2 = scrapy.Request("http://example.com", meta={"splash": {"args": {"html": 1}}}) r3 = scrapy.Request("http://example.com", meta={"splash": {"args": {"png": 1}}}) r4 = scrapy.Request("http://example.com", meta={"foo": "bar", "splash": {"args": {"html": 1}}}) r5 = scrapy.Request("http://example.com", meta={"splash": {"args": {"html": 1, "wait": 1.0}}}) assert request_fingerprint(r1) == request_fingerprint(r2) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r3) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r4) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r5) assert splash_request_fingerprint(r2) != splash_request_fingerprint(r3) # only "splash" contents is taken into account assert splash_request_fingerprint(r2) == splash_request_fingerprint(r4)
def test_request_fingerprint_splash(): r1 = scrapy.Request("http://example.com") r2 = scrapy.Request("http://example.com", meta={"splash": { "args": { "html": 1 } }}) r3 = scrapy.Request("http://example.com", meta={"splash": { "args": { "png": 1 } }}) r4 = scrapy.Request("http://example.com", meta={ "foo": "bar", "splash": { "args": { "html": 1 } } }) r5 = scrapy.Request("http://example.com", meta={"splash": { "args": { "html": 1, "wait": 1.0 } }}) assert request_fingerprint(r1) == request_fingerprint(r2) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r3) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r4) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r5) assert splash_request_fingerprint(r2) != splash_request_fingerprint(r3) # only "splash" contents is taken into account assert splash_request_fingerprint(r2) == splash_request_fingerprint(r4)
def test_request_fingerprint_prerender(): r1 = scrapy.Request("http://example.com") r2 = scrapy.Request("http://example.com", meta={"prerender": { "args": { "html": 1 } }}) r3 = scrapy.Request("http://example.com", meta={"prerender": { "args": { "png": 1 } }}) r4 = scrapy.Request("http://example.com", meta={ "foo": "bar", "prerender": { "args": { "html": 1 } } }) r5 = scrapy.Request("http://example.com", meta={"prerender": { "args": { "html": 1, "wait": 1.0 } }}) assert request_fingerprint(r1) == request_fingerprint(r2) assert_fingerprints_dont_match(r1, r2) assert_fingerprints_dont_match(r1, r3) assert_fingerprints_dont_match(r1, r4) assert_fingerprints_dont_match(r1, r5) assert_fingerprints_dont_match(r2, r3) # only "prerender" contents is taken into account assert_fingerprints_match(r2, r4)
def request_fingerprint(self, request): """Returns a fingerprint for a given request. Parameters ---------- request : scrapy.http.Request Returns ------- str """ # splash_request_fingerprint 会自动判断 request 是否符合 SplashRequest 特征 # 如果符合 SplashRequest 特征会进一步处理,否则就和普通的 request_fingerprint 是一样的效果 return request_fingerprint(request)
def test_request_fingerprint_nosplash(): r1 = scrapy.Request("http://example.com") r2 = scrapy.Request("http://example.com", meta={"foo": "bar"}) assert request_fingerprint(r1) == splash_request_fingerprint(r1) assert request_fingerprint(r1) == request_fingerprint(r2) assert request_fingerprint(r1) == splash_request_fingerprint(r2)
def test_request_fingerprint_noprerender(): r1 = scrapy.Request("http://example.com") r2 = scrapy.Request("http://example.com", meta={"foo": "bar"}) assert request_fingerprint(r1) == prerender_request_fingerprint(r1) assert request_fingerprint(r1) == request_fingerprint(r2) assert request_fingerprint(r1) == prerender_request_fingerprint(r2)