Ejemplo n.º 1
0
def get_citations_from_tree(complete_html_tree, case_path):
    path = '//center[descendant::text()[not(starts-with(normalize-space(.), "No.") or starts-with(normalize-space(.), "Case No.") or starts-with(normalize-space(.), "Record No."))]]'
    citations = []
    for e in complete_html_tree.xpath(path):
        text = tostring(e, method="text", encoding="unicode")
        citations.extend(get_citations(text, html=False, do_defendant=False))
    if not citations:
        path = "//title/text()"
        text = complete_html_tree.xpath(path)[0]
        citations = get_citations(text, html=False, do_post_citation=False, do_defendant=False)

    if not citations:
        try:
            citations = fixes[case_path]["citations"]
        except KeyError:
            if "input_citations" in DEBUG:
                subprocess.Popen(["firefox", "file://%s" % case_path], shell=False).communicate()
                input_citation = raw_input("  No citations found. What should be here? ")
                citation_objects = get_citations(input_citation, html=False, do_post_citation=False, do_defendant=False)
                add_fix(case_path, {"citations": citation_objects})
                citations = citation_objects

    if "citations" in DEBUG and len(citations):
        cite_strs = [str(cite.__dict__) for cite in citations]
        log_print("  Citations found: %s" % ",\n                   ".join(cite_strs))
    elif "citations" in DEBUG:
        log_print("  No citations found!")
    return citations
Ejemplo n.º 2
0
def get_citations_from_tree(complete_html_tree, case_path):
    path = '//center[descendant::text()[not(starts-with(normalize-space(.), "No.") or starts-with(normalize-space(.), "Case No.") or starts-with(normalize-space(.), "Record No."))]]'
    citations = []
    for e in complete_html_tree.xpath(path):
        text = tostring(e, method='text', encoding='unicode')
        citations.extend(get_citations(text, html=False, do_defendant=False))
    if not citations:
        path = '//title/text()'
        text = complete_html_tree.xpath(path)[0]
        citations = get_citations(text, html=False, do_post_citation=False, do_defendant=False)

    if not citations:
        try:
            citations = fixes[case_path]['citations']
        except KeyError:
            if 'input_citations' in DEBUG:
                subprocess.Popen(['firefox', 'file://%s' % case_path], shell=False).communicate()
                input_citation = raw_input('  No citations found. What should be here? ')
                citation_objects = get_citations(input_citation, html=False, do_post_citation=False, do_defendant=False)
                add_fix(case_path, {'citations': citation_objects})
                citations = citation_objects

    if 'citations' in DEBUG and len(citations):
        cite_strs = [str(cite.__dict__) for cite in citations]
        log_print("  Citations found: %s" % ',\n                   '.join(cite_strs))
    elif 'citations' in DEBUG:
        log_print("  No citations found!")
    return citations
Ejemplo n.º 3
0
def get_document_citations(document):
    """Identify and return citations from the html or plain text of the document."""
    if document.html_lawbox:
        citations = find_citations.get_citations(document.html_lawbox)
    elif document.html:
        citations = find_citations.get_citations(document.html)
    elif document.plain_text:
        citations = find_citations.get_citations(document.plain_text, html=False)
    else:
        citations = []
    return citations
Ejemplo n.º 4
0
def get_document_citations(document):
    """Identify and return citations from the html or plain text of the document."""
    if document.html_lawbox:
        citations = find_citations.get_citations(document.html_lawbox)
    elif document.html:
        citations = find_citations.get_citations(document.html)
    elif document.plain_text:
        citations = find_citations.get_citations(document.plain_text, html=False)
    else:
        citations = []
    return citations
Ejemplo n.º 5
0
 def test_disambiguate_citations(self):
     test_pairs = [
         # 1. P.R.R --> Correct abbreviation for a reporter.
         ('1 P.R.R. 1',
          [find_citations.Citation(volume=1, reporter='P.R.R.', page=1, canonical_reporter='P.R.R.',
                                    lookup_index=0)]),
         # 2. U. S. --> A simple variant to resolve.
         ('1 U. S. 1',
          [find_citations.Citation(volume=1, reporter='U.S.', page=1, canonical_reporter='U.S.', lookup_index=0,
                                    court='scotus')]),
         # 3. A.2d --> Not a variant, but needs to be looked up in the EDITIONS variable.
         ('1 A.2d 1',
          [find_citations.Citation(volume=1, reporter='A.2d', page=1, canonical_reporter='A.', lookup_index=0)]),
         # 4. A. 2d --> An unambiguous variant of an edition
         ('1 A. 2d 1',
          [find_citations.Citation(volume=1, reporter='A.2d', page=1, canonical_reporter='A.', lookup_index=0)]),
         # 5. P.R. --> A variant of 'Pen. & W.', 'P.R.R.', or 'P.' that's resolvable by year
         ('1 P.R. 1 (1831)', # Of the three, only Pen & W. was being published this year.
          [find_citations.Citation(volume=1, reporter='Pen. & W.', page=1, canonical_reporter='Pen. & W.',
                                    lookup_index=0, year=1831)]),
         # 5.1: W.2d --> A variant of an edition that either resolves to 'Wis. 2d' or 'Wash. 2d' and is resolvable
         #               by year.
         ('1 W.2d 1 (1854)', # Of the two, only Wis. 2d was being published this year.
          [find_citations.Citation(volume=1, reporter='Wis. 2d', page=1, canonical_reporter='Wis.', lookup_index=0,
                                    year=1854)]),
         # 5.2: Wash. --> A non-variant that has more than one reporter for the key, but is resolvable by year
         ('1 Wash. 1 (1890)',
          [find_citations.Citation(volume=1, reporter='Wash.', page=1, canonical_reporter='Wash.', lookup_index=1,
                                    year=1890)]),
         # 6. Cr. --> A variant of Cranch, which is ambiguous, except with paired with this variation.
         ('1 Cr. 1',
          [find_citations.Citation(volume=1, reporter='Cranch', page=1, canonical_reporter='Cranch', lookup_index=0,
                                    court='scotus')]),
         # 7. Cranch. --> Not a variant, but could refer to either Cranch's Supreme Court cases or his DC ones.
         #                In this case, we cannot disambiguate. Years are not known, and we have no further clues.
         #                We must simply drop Cranch from the results.
         ('1 Cranch 1 1 U.S. 23',
          [find_citations.Citation(volume=1, reporter='U.S.', page=23, canonical_reporter='U.S.', lookup_index=0,
                                    court='scotus')]),
         # 8. Unsolved problem. In theory, we could use parallel citations to resolve this, because Rob is getting
         # cited next to La., but we don't currently know the proximity of citations to each other, so can't use
         # this.
         #  - Rob. --> Either:
         #                8.1: A variant of Robards (1862-1865) or
         #                8.2: Robinson's Louisiana Reports (1841-1846) or
         #                8.3: Robinson's Virgina Reports (1842-1865)
         #('1 Rob. 1 1 La. 1',
         # [find_citations.Citation(volume=1, reporter='Rob.', page=1, canonical_reporter='Rob.', lookup_index=0),
         #  find_citations.Citation(volume=1, reporter='La.', page=1, canonical_reporter='La.', lookup_index=0)]),
     ]
     for pair in test_pairs:
         citations = get_citations(pair[0], html=False)
         self.assertEqual(citations, pair[1],
                          msg='%s\n%s != \n%s' % (pair[0], [cite.__dict__ for cite in citations],
                                                  [cite.__dict__ for cite in pair[1]]))
Ejemplo n.º 6
0
 def test_find_citations(self):
     """Can we find and make Citation objects from strings?"""
     test_pairs = (
         # Basic test
         ('1 U.S. 1',
          find_citations.Citation(volume=1, reporter='U.S.', page=1, canonical_reporter='U.S.', lookup_index=0,
                                   court='scotus')),
         # Basic test of non-case name before citation (should not be found)
         ('lissner test 1 U.S. 1',
          find_citations.Citation(volume=1, reporter='U.S.', page=1, canonical_reporter='U.S.', lookup_index=0,
                                   court='scotus')),
         # Test with plaintiff and defendant
         ('lissner v. test 1 U.S. 1',
          find_citations.Citation(plaintiff='lissner', defendant='test', volume=1, reporter='U.S.', page=1,
                                   canonical_reporter='U.S.', lookup_index=0, court='scotus')),
         # Test with plaintiff, defendant and year
         ('lissner v. test 1 U.S. 1 (1982)',
          find_citations.Citation(plaintiff='lissner', defendant='test', volume=1, reporter='U.S.', page=1,
                                   year=1982,
                                   canonical_reporter='U.S.', lookup_index=0, court='scotus')),
         # Test with different reporter than all of above.
         ('bob lissner v. test 1 F.2d 1 (1982)',
          find_citations.Citation(plaintiff='lissner', defendant='test', volume=1, reporter='F.2d', page=1,
                                   year=1982,
                                   canonical_reporter='F.', lookup_index=0)),
         # Test with court and extra information
         ('bob lissner v. test 1 U.S. 12, 347-348 (4th Cir. 1982)',
          find_citations.Citation(plaintiff='lissner', defendant='test', volume=1, reporter='U.S.', page=12,
                                   year=1982,
                                   extra=u'347-348', court='ca4', canonical_reporter='U.S.', lookup_index=0)),
         # Test with text before and after and a variant reporter
         ('asfd 22 U. S. 332 (1975) asdf',
          find_citations.Citation(volume=22, reporter='U.S.', page=332, year=1975, canonical_reporter='U.S.',
                                   lookup_index=0,
                                   court='scotus')),
         # Test with finding reporter when it's a second edition
         ('asdf 22 A.2d 332 asdf',
          find_citations.Citation(volume=22, reporter='A.2d', page=332, canonical_reporter='A.', lookup_index=0)),
         # Test finding a variant second edition reporter
         ('asdf 22 A. 2d 332 asdf',
          find_citations.Citation(volume=22, reporter='A.2d', page=332, canonical_reporter='A.', lookup_index=0)),
         # Test finding a variant of an edition resolvable by variant alone.
         ('171 Wn.2d 1016',
          find_citations.Citation(volume=171, reporter='Wash. 2d', page=1016, canonical_reporter='Wash.',
                                   lookup_index=1)),
     )
     for q, a in test_pairs:
         cite_found = get_citations(q)[0]
         self.assertEqual(cite_found, a,
                          msg='%s\n%s != \n%s' % (q, cite_found.__dict__, a.__dict__))
Ejemplo n.º 7
0
def citation_redirector(request, reporter, volume, page):
    """Take a citation URL and use it to redirect the user to the canonical page
    for that citation.

    This uses the same infrastructure as the thing that identifies citations in
    the text of opinions.
    """
    citation_str = " ".join([volume, reporter, page])
    try:
        citation = get_citations(citation_str)[0]
        citation_str = citation.base_citation()  # Corrects typos/variations.
        lookup_fields = [map_citations_to_models([citation]).keys()[0]]
    except IndexError:
        # Unable to disambiguate the citation. Try looking in *all* citation
        # fields.
        lookup_fields = [
            "neutral_cite",
            "federal_cite_one",
            "federal_cite_two",
            "federal_cite_three",
            "specialty_cite_one",
            "state_cite_regional",
            "state_cite_one",
            "state_cite_two",
            "state_cite_three",
            "westlaw_cite",
            "lexis_cite",
        ]

    # We were able to get a match, expand it if it's a federal/state match.
    if len(lookup_fields) == 1 and lookup_fields[0] == "federal_cite_one":
        lookup_fields = ["federal_cite_one", "federal_cite_two", "federal_cite_three"]
    elif len(lookup_fields) == 1 and lookup_fields[0] == "state_cite_one":
        lookup_fields = ["state_cite_one", "state_cite_two", "state_cite_three"]
    q = Q()
    for lookup_field in lookup_fields:
        q |= Q(**{"citation__" + lookup_field: citation_str})
    documents = Document.objects.filter(q)

    # Show the correct page....
    if documents.count() == 0:
        # No results for an otherwise valid citation.
        response = render_to_response(
            "casepage/citation_redirect_info_page.html",
            {"none_found": True, "citation_str": citation_str, "private": True},
            RequestContext(request),
            # status=404,
        )
        response.status_code = 404
        return response

    elif documents.count() == 1:
        # Total success. Redirect to correct location.
        return HttpResponsePermanentRedirect(documents[0].get_absolute_url())

    elif documents.count() > 1:
        # Multiple results. Show them.
        response = render_to_response(
            "casepage/citation_redirect_info_page.html",
            {"too_many": True, "citation_str": citation_str, "documents": documents, "private": True},
            RequestContext(request),
            # status=300,
        )
        response.status_code = 300
        return response
Ejemplo n.º 8
0
 def test_find_citations(self):
     """Can we find and make Citation objects from strings?"""
     test_pairs = (
         # Basic test
         ('1 U.S. 1',
          find_citations.Citation(volume=1,
                                  reporter='U.S.',
                                  page=1,
                                  canonical_reporter='U.S.',
                                  lookup_index=0,
                                  court='scotus')),
         # Basic test of non-case name before citation (should not be found)
         ('lissner test 1 U.S. 1',
          find_citations.Citation(volume=1,
                                  reporter='U.S.',
                                  page=1,
                                  canonical_reporter='U.S.',
                                  lookup_index=0,
                                  court='scotus')),
         # Test with plaintiff and defendant
         ('lissner v. test 1 U.S. 1',
          find_citations.Citation(plaintiff='lissner',
                                  defendant='test',
                                  volume=1,
                                  reporter='U.S.',
                                  page=1,
                                  canonical_reporter='U.S.',
                                  lookup_index=0,
                                  court='scotus')),
         # Test with plaintiff, defendant and year
         ('lissner v. test 1 U.S. 1 (1982)',
          find_citations.Citation(plaintiff='lissner',
                                  defendant='test',
                                  volume=1,
                                  reporter='U.S.',
                                  page=1,
                                  year=1982,
                                  canonical_reporter='U.S.',
                                  lookup_index=0,
                                  court='scotus')),
         # Test with different reporter than all of above.
         ('bob lissner v. test 1 F.2d 1 (1982)',
          find_citations.Citation(plaintiff='lissner',
                                  defendant='test',
                                  volume=1,
                                  reporter='F.2d',
                                  page=1,
                                  year=1982,
                                  canonical_reporter='F.',
                                  lookup_index=0)),
         # Test with court and extra information
         ('bob lissner v. test 1 U.S. 12, 347-348 (4th Cir. 1982)',
          find_citations.Citation(plaintiff='lissner',
                                  defendant='test',
                                  volume=1,
                                  reporter='U.S.',
                                  page=12,
                                  year=1982,
                                  extra=u'347-348',
                                  court='ca4',
                                  canonical_reporter='U.S.',
                                  lookup_index=0)),
         # Test with text before and after and a variant reporter
         ('asfd 22 U. S. 332 (1975) asdf',
          find_citations.Citation(volume=22,
                                  reporter='U.S.',
                                  page=332,
                                  year=1975,
                                  canonical_reporter='U.S.',
                                  lookup_index=0,
                                  court='scotus')),
         # Test with finding reporter when it's a second edition
         ('asdf 22 A.2d 332 asdf',
          find_citations.Citation(volume=22,
                                  reporter='A.2d',
                                  page=332,
                                  canonical_reporter='A.',
                                  lookup_index=0)),
         # Test finding a variant second edition reporter
         ('asdf 22 A. 2d 332 asdf',
          find_citations.Citation(volume=22,
                                  reporter='A.2d',
                                  page=332,
                                  canonical_reporter='A.',
                                  lookup_index=0)),
         # Test finding a variant of an edition resolvable by variant alone.
         ('171 Wn.2d 1016',
          find_citations.Citation(volume=171,
                                  reporter='Wash. 2d',
                                  page=1016,
                                  canonical_reporter='Wash.',
                                  lookup_index=1)),
     )
     for q, a in test_pairs:
         cite_found = get_citations(q)[0]
         self.assertEqual(cite_found,
                          a,
                          msg='%s\n%s != \n%s' %
                          (q, cite_found.__dict__, a.__dict__))
Ejemplo n.º 9
0
 def test_disambiguate_citations(self):
     test_pairs = [
         # 1. P.R.R --> Correct abbreviation for a reporter.
         ('1 P.R.R. 1', [
             find_citations.Citation(volume=1,
                                     reporter='P.R.R.',
                                     page=1,
                                     canonical_reporter='P.R.R.',
                                     lookup_index=0)
         ]),
         # 2. U. S. --> A simple variant to resolve.
         ('1 U. S. 1', [
             find_citations.Citation(volume=1,
                                     reporter='U.S.',
                                     page=1,
                                     canonical_reporter='U.S.',
                                     lookup_index=0,
                                     court='scotus')
         ]),
         # 3. A.2d --> Not a variant, but needs to be looked up in the EDITIONS variable.
         ('1 A.2d 1', [
             find_citations.Citation(volume=1,
                                     reporter='A.2d',
                                     page=1,
                                     canonical_reporter='A.',
                                     lookup_index=0)
         ]),
         # 4. A. 2d --> An unambiguous variant of an edition
         ('1 A. 2d 1', [
             find_citations.Citation(volume=1,
                                     reporter='A.2d',
                                     page=1,
                                     canonical_reporter='A.',
                                     lookup_index=0)
         ]),
         # 5. P.R. --> A variant of 'Pen. & W.', 'P.R.R.', or 'P.' that's resolvable by year
         (
             '1 P.R. 1 (1831)',  # Of the three, only Pen & W. was being published this year.
             [
                 find_citations.Citation(volume=1,
                                         reporter='Pen. & W.',
                                         page=1,
                                         canonical_reporter='Pen. & W.',
                                         lookup_index=0,
                                         year=1831)
             ]),
         # 5.1: W.2d --> A variant of an edition that either resolves to 'Wis. 2d' or 'Wash. 2d' and is resolvable
         #               by year.
         (
             '1 W.2d 1 (1854)',  # Of the two, only Wis. 2d was being published this year.
             [
                 find_citations.Citation(volume=1,
                                         reporter='Wis. 2d',
                                         page=1,
                                         canonical_reporter='Wis.',
                                         lookup_index=0,
                                         year=1854)
             ]),
         # 5.2: Wash. --> A non-variant that has more than one reporter for the key, but is resolvable by year
         ('1 Wash. 1 (1890)', [
             find_citations.Citation(volume=1,
                                     reporter='Wash.',
                                     page=1,
                                     canonical_reporter='Wash.',
                                     lookup_index=1,
                                     year=1890)
         ]),
         # 6. Cr. --> A variant of Cranch, which is ambiguous, except with paired with this variation.
         ('1 Cr. 1', [
             find_citations.Citation(volume=1,
                                     reporter='Cranch',
                                     page=1,
                                     canonical_reporter='Cranch',
                                     lookup_index=0,
                                     court='scotus')
         ]),
         # 7. Cranch. --> Not a variant, but could refer to either Cranch's Supreme Court cases or his DC ones.
         #                In this case, we cannot disambiguate. Years are not known, and we have no further clues.
         #                We must simply drop Cranch from the results.
         ('1 Cranch 1 1 U.S. 23', [
             find_citations.Citation(volume=1,
                                     reporter='U.S.',
                                     page=23,
                                     canonical_reporter='U.S.',
                                     lookup_index=0,
                                     court='scotus')
         ]),
         # 8. Unsolved problem. In theory, we could use parallel citations to resolve this, because Rob is getting
         # cited next to La., but we don't currently know the proximity of citations to each other, so can't use
         # this.
         #  - Rob. --> Either:
         #                8.1: A variant of Robards (1862-1865) or
         #                8.2: Robinson's Louisiana Reports (1841-1846) or
         #                8.3: Robinson's Virgina Reports (1842-1865)
         #('1 Rob. 1 1 La. 1',
         # [find_citations.Citation(volume=1, reporter='Rob.', page=1, canonical_reporter='Rob.', lookup_index=0),
         #  find_citations.Citation(volume=1, reporter='La.', page=1, canonical_reporter='La.', lookup_index=0)]),
     ]
     for pair in test_pairs:
         citations = get_citations(pair[0], html=False)
         self.assertEqual(citations,
                          pair[1],
                          msg='%s\n%s != \n%s' %
                          (pair[0], [cite.__dict__ for cite in citations
                                     ], [cite.__dict__
                                         for cite in pair[1]]))
Ejemplo n.º 10
0
def citation_redirector(request, reporter, volume, page):
    """Take a citation URL and use it to redirect the user to the canonical page
    for that citation.

    This uses the same infrastructure as the thing that identifies citations in
    the text of opinions.
    """
    citation_str = " ".join([volume, reporter, page])
    try:
        citation = get_citations(citation_str)[0]
        citation_str = citation.base_citation()  # Corrects typos/variations.
        lookup_fields = [map_citations_to_models([citation]).keys()[0]]
    except IndexError:
        # Unable to disambiguate the citation. Try looking in *all* citation
        # fields.
        lookup_fields = [
            'neutral_cite', 'federal_cite_one', 'federal_cite_two',
            'federal_cite_three', 'specialty_cite_one', 'state_cite_regional',
            'state_cite_one', 'state_cite_two', 'state_cite_three',
            'westlaw_cite', 'lexis_cite'
        ]

    # We were able to get a match, expand it if it's a federal/state match.
    if len(lookup_fields) == 1 and lookup_fields[0] == 'federal_cite_one':
        lookup_fields = [
            'federal_cite_one', 'federal_cite_two', 'federal_cite_three'
        ]
    elif len(lookup_fields) == 1 and lookup_fields[0] == 'state_cite_one':
        lookup_fields = [
            'state_cite_one', 'state_cite_two', 'state_cite_three'
        ]
    q = Q()
    for lookup_field in lookup_fields:
        q |= Q(**{'citation__' + lookup_field: citation_str})
    documents = Document.objects.filter(q)

    # Show the correct page....
    if documents.count() == 0:
        # No results for an otherwise valid citation.
        response = render_to_response(
            'casepage/citation_redirect_info_page.html',
            {
                'none_found': True,
                'citation_str': citation_str,
                'private': True,
            },
            RequestContext(request),
            #status=404,
        )
        response.status_code = 404
        return response

    elif documents.count() == 1:
        # Total success. Redirect to correct location.
        return HttpResponsePermanentRedirect(documents[0].get_absolute_url())

    elif documents.count() > 1:
        # Multiple results. Show them.
        response = render_to_response(
            'casepage/citation_redirect_info_page.html',
            {
                'too_many': True,
                'citation_str': citation_str,
                'documents': documents,
                'private': True,
            },
            RequestContext(request),
            #status=300,
        )
        response.status_code = 300
        return response