def assert_filtered(r1, r2): # request is filtered if it is filtered either # before rescheduling or after fp1 = splash_request_fingerprint(r1) fp2 = splash_request_fingerprint(r2) if fp1 != fp2: assert_fingerprints_match( splash_mw_process(r1), splash_mw_process(r2), )
def assert_filtered(r1, r2): # request is filtered if it is filtered either # before rescheduling or after fp1 = splash_request_fingerprint(r1) fp2 = splash_request_fingerprint(r2) if fp1 != fp2: assert_fingerprints_match( splash_mw_process(r1), splash_mw_process(r2), )
def request_seen(self, request): fp = splash_request_fingerprint(request) if self.bf.exists(fp): # bf is a object of BloomFilter return True self.bf.insert(fp) return False
def request_fingerprint(self, request): return splash_request_fingerprint(request)
def request_fingerprint(self, request): return splash_request_fingerprint(request)
def assert_fingerprints_dont_match(r1, r2): assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2)
def test_request_fingerprint_nosplash(): r1 = scrapy.Request("http://example.com") r2 = scrapy.Request("http://example.com", meta={"foo": "bar"}) assert request_fingerprint(r1) == splash_request_fingerprint(r1) assert request_fingerprint(r1) == request_fingerprint(r2) assert request_fingerprint(r1) == splash_request_fingerprint(r2)
def assert_fingerprints_dont_match(r1, r2): assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2)
def test_request_fingerprint_nosplash(): r1 = scrapy.Request("http://example.com") r2 = scrapy.Request("http://example.com", meta={"foo": "bar"}) assert request_fingerprint(r1) == splash_request_fingerprint(r1) assert request_fingerprint(r1) == request_fingerprint(r2) assert request_fingerprint(r1) == splash_request_fingerprint(r2)
def test_request_fingerprint_splash(): r1 = scrapy.Request("http://example.com") r2 = scrapy.Request("http://example.com", meta={"splash": {"args": {"html": 1}}}) r3 = scrapy.Request("http://example.com", meta={"splash": {"args": {"png": 1}}}) r4 = scrapy.Request("http://example.com", meta={"foo": "bar", "splash": {"args": {"html": 1}}}) r5 = scrapy.Request("http://example.com", meta={"splash": {"args": {"html": 1, "wait": 1.0}}}) assert request_fingerprint(r1) == request_fingerprint(r2) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r3) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r4) assert splash_request_fingerprint(r1) != splash_request_fingerprint(r5) assert splash_request_fingerprint(r2) != splash_request_fingerprint(r3) # only "splash" contents is taken into account assert splash_request_fingerprint(r2) == splash_request_fingerprint(r4)