def test_abort_on_changed_court_website(self): """Similar to the above, but we create a url2Hash with a different hash before checking if it exists.""" site = test_scraper.Site() site.hash = 'this is a dummy hash code string' for dup_checker in self.dup_checkers: urlToHash(url=site.url, SHA1=site.hash).save() abort = dup_checker.abort_by_url_hash(site.url, "this is a *different* hash!") if dup_checker.full_crawl: self.assertFalse(abort, "DupChecker says to abort during a full crawl.") else: self.assertFalse(abort, "DupChecker says to abort on a court where the hash has changed.") dup_checker.url2Hash.delete()
def test_abort_on_unchanged_court_website(self): """Similar to the above, but we create a url2hash object before checking if it exists.""" site = test_scraper.Site() site.hash = 'this is a dummy hash code string' for dup_checker in self.dup_checkers: urlToHash(url=site.url, SHA1=site.hash).save() abort = dup_checker.abort_by_url_hash(site.url, site.hash) if dup_checker.full_crawl: self.assertFalse(abort, "DupChecker says to abort during a full crawl.") else: self.assertTrue(abort, "DupChecker says not to abort on a court that's been crawled before with the same hash") dup_checker.url2Hash.delete()
def test_abort_on_changed_court_website(self): """Similar to the above, but we create a url2Hash with a different hash before checking if it exists.""" site = test_opinion_scraper.Site() site.hash = 'this is a dummy hash code string' for dup_checker in self.dup_checkers: urlToHash(pk=site.url, SHA1=site.hash).save() abort = dup_checker.abort_by_url_hash( site.url, "this is a *different* hash!") if dup_checker.full_crawl: self.assertFalse( abort, "DupChecker says to abort during a full crawl.") else: self.assertFalse( abort, "DupChecker says to abort on a court where the hash has changed." ) dup_checker.url2Hash.delete()
def test_abort_on_unchanged_court_website(self): """Similar to the above, but we create a url2hash object before checking if it exists.""" site = test_opinion_scraper.Site() site.hash = 'this is a dummy hash code string' for dup_checker in self.dup_checkers: urlToHash(id=site.url, SHA1=site.hash).save() abort = dup_checker.abort_by_url_hash(site.url, site.hash) if dup_checker.full_crawl: self.assertFalse( abort, "DupChecker says to abort during a full crawl.") else: self.assertTrue( abort, "DupChecker says not to abort on a court that's been " "crawled before with the same hash") dup_checker.url2Hash.delete()