예제 #1
0
def test_extract_citations(case_factory, tmpdir, settings, elasticsearch):
    from scripts.extract_cites import EDITIONS as processed_editions
    settings.MISSED_CITATIONS_DIR = str(tmpdir)
    blocked_by_date = set(
        k for k in list(EDITIONS.keys()) + list(VARIATIONS_ONLY.keys())
        if all(c['start_year'] > 2000 for c in processed_editions[k]))
    legitimate_cites = [
        "225 F. Supp. 552",  # correct
        ["125 f supp 152", "125 F. Supp. 152"],  # normalized
        ["125 Burnett (Wis.) 152", "125 Bur. 152"],  # normalized
        ["1 F. 2d 2", "1 F.2d 2"],  # not matched as "1 F. 2"
        "2 1/2 Mass. 1",  # special volume numbers
        "3 Suppl. Mass. 2",  # special volume numbers
        "1 La.App. 5 Cir. 2",  # not matched as "1 La.App. 5"
        "2000 WL 12345",  # vendor cite
    ]
    legitimate_cites += [
        "1 %s 1" % c for c in EDITIONS.keys() if c not in blocked_by_date
    ]
    legitimate_cites += [["1 %s 1" % k, "1 %s 1" % v]
                         for k, vv in VARIATIONS_ONLY.items() for v in vv
                         if k not in blocked_by_date]
    legitimate_cites_normalized = set(
        normalize_cite(c if type(c) is str else c[1])
        for c in legitimate_cites)
    legitimate_cites = [
        c if type(c) is str else c[0] for c in legitimate_cites
    ]
    illegitimate_cites = [
        "2 Dogs 3",  # unrecognized reporter
        "3 Dogs 4",  # duplicate unrecognized reporter
        "1 or 2",  # not matched as 1 Or. 2
        "word1 Mass. 2word",  # not matched if part of larger word
        "1 Mass.\n 2",  # no match across newlines
        "1 A.3d 1",  # no match to reporter that started publishing in 2010
    ]
    illegitimate_cites += ["1 %s 1" % c for c in blocked_by_date]
    case = case_factory(
        body_cache__text=", some text, ".join(legitimate_cites +
                                              illegitimate_cites),
        decision_date=datetime(2000, 1, 1))
    fabfile.extract_all_citations()
    update_elasticsearch_from_queue()

    # check extracted cites
    cites = list(ExtractedCitation.objects.all())
    cite_set = set(c.cite for c in cites)
    normalized_cite_set = set(c.normalized_cite for c in cites)
    assert cite_set == set(legitimate_cites)
    assert normalized_cite_set == legitimate_cites_normalized
    assert all(c.cited_by_id == case.pk for c in cites)
예제 #2
0
def test_case_cited_by(client, case_factory, tmpdir, settings, elasticsearch):
    settings.MISSED_CITATIONS_DIR = str(tmpdir)
    dest_case = case_factory()
    dest_cite = dest_case.citations.first()
    source_cases = [case_factory(body_cache__text=dest_cite.cite) for _ in range(2)]
    non_citing_case = case_factory()
    fabfile.extract_all_citations()
    update_elasticsearch_from_queue()

    response = client.get(reverse('case_cited_by', args=[dest_case.pk], host='cite'))
    check_response(
        response,
        content_includes=[c.name_abbreviation for c in source_cases],
        content_excludes=[non_citing_case.name_abbreviation]
    )
예제 #3
0
def test_filter_case_cite_by(client, extracted_citation_factory, case_factory,
                             elasticsearch):
    search_url = api_reverse("cases-list")
    cases = [case_factory() for _ in range(4)]
    case_cited = case_factory(citations__cite='1 Mass. 1')
    for c in cases[:-1]:
        extracted_citation_factory(cited_by=c, cite='1 Mass. 1')
    update_elasticsearch_from_queue()

    # get cases by cites_to=citation
    content = client.get(search_url, {"cites_to": '1 Mass. 1'}).json()
    assert set(case['id']
               for case in content['results']) == set(c.id for c in cases[:-1])

    # get cases by cites_to=id
    content = client.get(search_url, {"cites_to": case_cited.id}).json()
    assert set(case['id']
               for case in content['results']) == set(c.id for c in cases[:-1])
예제 #4
0
def test_extract_citations(case_factory, elasticsearch):
    legitimate_cites = [
        "225 F. Supp. 552",  # correct
        ["1 F Supp 1", "1 F. Supp. 1"],  # normalized
        ["2 F.-'Supp.- 2",
         "2 F. Supp. 2"],  # extra cruft matched by get_cite_extractor
        ["125 Yt. 152",
         "125 Vt. 152"],  # custom reporters added by patch_reporters_db
        ["125 Burnett (Wis.) 152", "125 Bur. 152"],  # normalized
        ["1 F. 2d 2", "1 F.2d 2"],  # not matched as "1 F. 2"
        "2000 WL 12345",  # vendor cite
        ["3 F Supp, at 3", "3 F. Supp. 3"],  # short cite
    ]
    legitimate_cites_normalized = set(
        normalize_cite(c if type(c) is str else c[1])
        for c in legitimate_cites)
    legitimate_cites = [
        c if type(c) is str else c[0] for c in legitimate_cites
    ]
    illegitimate_cites = [
        "2 Dogs 3",  # unrecognized reporter
        "3 Dogs 4",  # duplicate unrecognized reporter
        "1 or 2",  # not matched as 1 Or. 2
        "125 f. supp.-' 152",  # limit on how much cruft matched by get_cite_extractor -- only 2 at end
    ]
    case = case_factory(decision_date=datetime(2000, 1, 1))
    case_text = ", some text, ".join(legitimate_cites + illegitimate_cites)
    set_case_text(case, case_text)
    case.sync_case_body_cache()
    update_elasticsearch_from_queue()

    # check extracted cites
    cites = list(ExtractedCitation.objects.all())
    cite_set = set(c.cite for c in cites)
    normalized_cite_set = set(c.rdb_normalized_cite for c in cites)
    assert cite_set == set(legitimate_cites)
    assert normalized_cite_set == legitimate_cites_normalized
    assert all(c.cited_by_id == case.pk for c in cites)
예제 #5
0
        check_timestamps_unchanged(case, timestamp)

        # deleting
        obj.delete()
        timestamp = check_timestamps_changed(case, timestamp)

    # updating outbound references
    for obj, change_field, no_change_field in (
        (case.reporter, 'full_name', 'notes'),
        (case.court, 'name', 'none'),
        (case.volume, 'volume_number', 'publisher'),
        (case.jurisdiction, 'name', 'none'),
    ):
        # updating tracked field
        setattr(obj, change_field, 'foo')
        obj.save()
        timestamp = check_timestamps_changed(case, timestamp)

        # updated untracked field
        setattr(obj, no_change_field, 'foo')
        obj.save()
        check_timestamps_unchanged(case, timestamp)

    # case gets removed when in_scope changes
    update_elasticsearch_from_queue()
    CaseDocument.get(case.pk)
    case.duplicative = True
    case.save()
    update_elasticsearch_from_queue()
    CaseDocument.get(case.pk)