def test_abort_on_unchanged_court_website(self): """Similar to the above, but we create a UrlHash object before checking if it exists.""" site = test_opinion_scraper.Site() site.hash = 'this is a dummy hash code string' for dup_checker in self.dup_checkers: UrlHash(id=site.url, sha1=site.hash).save() abort = dup_checker.abort_by_url_hash(site.url, site.hash) if dup_checker.full_crawl: self.assertFalse( abort, "DupChecker says to abort during a full crawl.") else: self.assertTrue( abort, "DupChecker says not to abort on a court that's been " "crawled before with the same hash") dup_checker.url_hash.delete()
def test_abort_on_changed_court_website(self): """Similar to the above, but we create a UrlHash with a different hash before checking if it exists. """ site = test_opinion_scraper.Site() site.hash = 'this is a dummy hash code string' for dup_checker in self.dup_checkers: UrlHash(pk=site.url, sha1=site.hash).save() abort = dup_checker.abort_by_url_hash( site.url, "this is a *different* hash!") if dup_checker.full_crawl: self.assertFalse( abort, "DupChecker says to abort during a full crawl.") else: self.assertFalse( abort, "DupChecker says to abort on a court where the hash has " "changed.") dup_checker.url_hash.delete()