def assert_filtered(r1, r2):
     # request is filtered if it is filtered either
     # before rescheduling or after
     fp1 = splash_request_fingerprint(r1)
     fp2 = splash_request_fingerprint(r2)
     if fp1 != fp2:
         assert_fingerprints_match(
             splash_mw_process(r1),
             splash_mw_process(r2),
         )
 def assert_filtered(r1, r2):
     # request is filtered if it is filtered either
     # before rescheduling or after
     fp1 = splash_request_fingerprint(r1)
     fp2 = splash_request_fingerprint(r2)
     if fp1 != fp2:
         assert_fingerprints_match(
             splash_mw_process(r1),
             splash_mw_process(r2),
         )
Esempio n. 3
0
    def request_seen(self, request):
        fp = splash_request_fingerprint(request)
        if self.bf.exists(fp):  # bf is a object of BloomFilter
            return True

        self.bf.insert(fp)
        return False
 def request_fingerprint(self, request):
     return splash_request_fingerprint(request)
Esempio n. 5
0
 def request_fingerprint(self, request):
     return splash_request_fingerprint(request)
def assert_fingerprints_dont_match(r1, r2):
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2)
def test_request_fingerprint_nosplash():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com", meta={"foo": "bar"})
    assert request_fingerprint(r1) == splash_request_fingerprint(r1)
    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert request_fingerprint(r1) == splash_request_fingerprint(r2)
def assert_fingerprints_dont_match(r1, r2):
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2)
def test_request_fingerprint_nosplash():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com", meta={"foo": "bar"})
    assert request_fingerprint(r1) == splash_request_fingerprint(r1)
    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert request_fingerprint(r1) == splash_request_fingerprint(r2)
def test_request_fingerprint_splash():
    r1 = scrapy.Request("http://example.com")
    r2 = scrapy.Request("http://example.com", meta={"splash": {"args": {"html": 1}}})
    r3 = scrapy.Request("http://example.com", meta={"splash": {"args": {"png": 1}}})
    r4 = scrapy.Request("http://example.com", meta={"foo": "bar", "splash": {"args": {"html": 1}}})
    r5 = scrapy.Request("http://example.com", meta={"splash": {"args": {"html": 1, "wait": 1.0}}})

    assert request_fingerprint(r1) == request_fingerprint(r2)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r2)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r3)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r4)
    assert splash_request_fingerprint(r1) != splash_request_fingerprint(r5)
    assert splash_request_fingerprint(r2) != splash_request_fingerprint(r3)

    # only "splash" contents is taken into account
    assert splash_request_fingerprint(r2) == splash_request_fingerprint(r4)