Esempio n. 1
0
 def test_reporter_tokenizer(self):
     """Do we tokenize correctly?"""
     self.assertEqual(
         tokenize('See Roe v. Wade, 410 U. S. 113 (1973)'),
         ['See', 'Roe', 'v.', 'Wade,', '410', 'U. S.', '113', '(1973)'])
     self.assertEqual(
         tokenize('Foo bar eats grue, 232 Vet. App. (2003)'),
         ['Foo', 'bar', 'eats', 'grue,', '232', 'Vet. App.', '(2003)'])
Esempio n. 2
0
 def test_reporter_tokenizer(self):
     """Do we tokenize correctly?"""
     self.assertEqual(tokenize('See Roe v. Wade, 410 U. S. 113 (1973)'),
                      ['See', 'Roe', 'v.', 'Wade,', '410', 'U. S.', '113',
                       '(1973)'])
     self.assertEqual(tokenize('Foo bar eats grue, 232 Vet. App. (2003)'),
                      ['Foo', 'bar', 'eats', 'grue,', '232', 'Vet. App.',
                       '(2003)'])
Esempio n. 3
0
def get_citations(text, html=True, do_post_citation=True, do_defendant=True):
    if html:
        text = get_visible_text(text)
    words = reporter_tokenizer.tokenize(text)
    citations = []
    # Exclude first and last tokens when looking for reporters, because valid
    # citations must have a volume before and a page number after the reporter.
    for i in xrange(1, len(words) - 1):
        # Find reporter
        if words[i] in (EDITIONS.keys() + VARIATIONS_ONLY.keys()):
            citation = extract_base_citation(words, i)
            if citation is None:
                # Not a valid citation; continue looking
                continue
            if do_post_citation:
                add_post_citation(citation, words, i)
            if do_defendant:
                add_defendant(citation, words, i)
            citations.append(citation)

    # Disambiguate or drop all the reporters
    citations = disambiguate_reporters(citations)

    for citation in citations:
        if not citation.court and is_scotus_reporter(citation):
            citation.court = "scotus"

    return citations
Esempio n. 4
0
def get_citations(text, html=True, do_post_citation=True, do_defendant=True):
    if html:
        text = get_visible_text(text)
    words = reporter_tokenizer.tokenize(text)
    citations = []
    # Exclude first and last tokens when looking for reporters, because valid
    # citations must have a volume before and a page number after the reporter.
    for i in xrange(1, len(words) - 1):
        # Find reporter
        if words[i] in (EDITIONS.keys() + VARIATIONS_ONLY.keys()):
            citation = extract_base_citation(words, i)
            if citation is None:
                # Not a valid citation; continue looking
                continue
            if do_post_citation:
                add_post_citation(citation, words, i)
            if do_defendant:
                add_defendant(citation, words, i)
            citations.append(citation)

    # Disambiguate or drop all the reporters
    citations = disambiguate_reporters(citations)

    for citation in citations:
        if not citation.court and is_scotus_reporter(citation):
            citation.court = 'scotus'

    return citations