예제 #1
0
 def test_no_variation_is_same_as_key(self):
     """Are any variations identical to the keys they're supposed to be
     variations of?
     """
     for variation, keys in VARIATIONS_ONLY.items():
         for key in keys:
             self.assertNotEqual(
                 variation, key,
                 "The variation '%s' is identical to the key it's supposed "
                 "to be a variation of." % variation)
예제 #2
0
 def test_no_variation_is_same_as_key(self):
     """Are any variations identical to the keys they're supposed to be
     variations of?
     """
     for variation, keys in VARIATIONS_ONLY.items():
         for key in keys:
             self.assertNotEqual(
                 variation,
                 key,
                 "The variation '%s' is identical to the key it's supposed "
                 "to be a variation of." % variation
             )
예제 #3
0
def test_extract_citations(case_factory, tmpdir, settings, elasticsearch):
    from scripts.extract_cites import EDITIONS as processed_editions
    settings.MISSED_CITATIONS_DIR = str(tmpdir)
    blocked_by_date = set(
        k for k in list(EDITIONS.keys()) + list(VARIATIONS_ONLY.keys())
        if all(c['start_year'] > 2000 for c in processed_editions[k]))
    legitimate_cites = [
        "225 F. Supp. 552",  # correct
        ["125 f supp 152", "125 F. Supp. 152"],  # normalized
        ["125 Burnett (Wis.) 152", "125 Bur. 152"],  # normalized
        ["1 F. 2d 2", "1 F.2d 2"],  # not matched as "1 F. 2"
        "2 1/2 Mass. 1",  # special volume numbers
        "3 Suppl. Mass. 2",  # special volume numbers
        "1 La.App. 5 Cir. 2",  # not matched as "1 La.App. 5"
        "2000 WL 12345",  # vendor cite
    ]
    legitimate_cites += [
        "1 %s 1" % c for c in EDITIONS.keys() if c not in blocked_by_date
    ]
    legitimate_cites += [["1 %s 1" % k, "1 %s 1" % v]
                         for k, vv in VARIATIONS_ONLY.items() for v in vv
                         if k not in blocked_by_date]
    legitimate_cites_normalized = set(
        normalize_cite(c if type(c) is str else c[1])
        for c in legitimate_cites)
    legitimate_cites = [
        c if type(c) is str else c[0] for c in legitimate_cites
    ]
    illegitimate_cites = [
        "2 Dogs 3",  # unrecognized reporter
        "3 Dogs 4",  # duplicate unrecognized reporter
        "1 or 2",  # not matched as 1 Or. 2
        "word1 Mass. 2word",  # not matched if part of larger word
        "1 Mass.\n 2",  # no match across newlines
        "1 A.3d 1",  # no match to reporter that started publishing in 2010
    ]
    illegitimate_cites += ["1 %s 1" % c for c in blocked_by_date]
    case = case_factory(
        body_cache__text=", some text, ".join(legitimate_cites +
                                              illegitimate_cites),
        decision_date=datetime(2000, 1, 1))
    fabfile.extract_all_citations()
    update_elasticsearch_from_queue()

    # check extracted cites
    cites = list(ExtractedCitation.objects.all())
    cite_set = set(c.cite for c in cites)
    normalized_cite_set = set(c.normalized_cite for c in cites)
    assert cite_set == set(legitimate_cites)
    assert normalized_cite_set == legitimate_cites_normalized
    assert all(c.cited_by_id == case.pk for c in cites)