def test_no_variation_is_same_as_key(self): """Are any variations identical to the keys they're supposed to be variations of? """ for variation, keys in VARIATIONS_ONLY.items(): for key in keys: self.assertNotEqual( variation, key, "The variation '%s' is identical to the key it's supposed " "to be a variation of." % variation)
def test_no_variation_is_same_as_key(self): """Are any variations identical to the keys they're supposed to be variations of? """ for variation, keys in VARIATIONS_ONLY.items(): for key in keys: self.assertNotEqual( variation, key, "The variation '%s' is identical to the key it's supposed " "to be a variation of." % variation )
def test_extract_citations(case_factory, tmpdir, settings, elasticsearch): from scripts.extract_cites import EDITIONS as processed_editions settings.MISSED_CITATIONS_DIR = str(tmpdir) blocked_by_date = set( k for k in list(EDITIONS.keys()) + list(VARIATIONS_ONLY.keys()) if all(c['start_year'] > 2000 for c in processed_editions[k])) legitimate_cites = [ "225 F. Supp. 552", # correct ["125 f supp 152", "125 F. Supp. 152"], # normalized ["125 Burnett (Wis.) 152", "125 Bur. 152"], # normalized ["1 F. 2d 2", "1 F.2d 2"], # not matched as "1 F. 2" "2 1/2 Mass. 1", # special volume numbers "3 Suppl. Mass. 2", # special volume numbers "1 La.App. 5 Cir. 2", # not matched as "1 La.App. 5" "2000 WL 12345", # vendor cite ] legitimate_cites += [ "1 %s 1" % c for c in EDITIONS.keys() if c not in blocked_by_date ] legitimate_cites += [["1 %s 1" % k, "1 %s 1" % v] for k, vv in VARIATIONS_ONLY.items() for v in vv if k not in blocked_by_date] legitimate_cites_normalized = set( normalize_cite(c if type(c) is str else c[1]) for c in legitimate_cites) legitimate_cites = [ c if type(c) is str else c[0] for c in legitimate_cites ] illegitimate_cites = [ "2 Dogs 3", # unrecognized reporter "3 Dogs 4", # duplicate unrecognized reporter "1 or 2", # not matched as 1 Or. 2 "word1 Mass. 2word", # not matched if part of larger word "1 Mass.\n 2", # no match across newlines "1 A.3d 1", # no match to reporter that started publishing in 2010 ] illegitimate_cites += ["1 %s 1" % c for c in blocked_by_date] case = case_factory( body_cache__text=", some text, ".join(legitimate_cites + illegitimate_cites), decision_date=datetime(2000, 1, 1)) fabfile.extract_all_citations() update_elasticsearch_from_queue() # check extracted cites cites = list(ExtractedCitation.objects.all()) cite_set = set(c.cite for c in cites) normalized_cite_set = set(c.normalized_cite for c in cites) assert cite_set == set(legitimate_cites) assert normalized_cite_set == legitimate_cites_normalized assert all(c.cited_by_id == case.pk for c in cites)