def test_hash(self): """Do two citation objects hash to the same?""" Citation.__hash__ = Citation.fuzzy_hash citations = [ Citation(reporter=2, volume="U.S.", page="2", reporter_index=1), Citation(reporter=2, volume="U.S.", page="2", reporter_index=2), ] self.assertEqual( hash(citations[0]), hash(citations[1]), ) Citation.fuzzy_hash = Citation.__hash__
def test_find_tc_citations(self): """Can we parse tax court citations properly?""" test_pairs = ( # Test with atypical formatting for Tax Court Memos ('the 1 T.C. No. 233', [Citation(volume=1, reporter='T.C. No.', page=233, canonical_reporter=u'T.C. No.', lookup_index=0, reporter_index=2, reporter_found='T.C. No.')]), ('word T.C. Memo. 2019-233', [Citation(volume=2019, reporter='T.C. Memo.', page=233, canonical_reporter=u'T.C. Memo.', lookup_index=0, reporter_index=1, reporter_found='T.C. Memo.')]), ('something T.C. Summary Opinion 2019-233', [Citation(volume=2019, reporter='T.C. Summary Opinion', page=233, canonical_reporter=u'T.C. Summary Opinion', lookup_index=0, reporter_index=1, reporter_found='T.C. Summary Opinion')]), ('T.C. Summary Opinion 2018-133', [Citation(volume=2018, reporter='T.C. Summary Opinion', page=133, canonical_reporter=u'T.C. Summary Opinion', lookup_index=0, reporter_index=0, reporter_found='T.C. Summary Opinion')]), ('1 UNITED STATES TAX COURT REPORT (2018)', [Citation(volume=1, reporter='T.C.', page=2018, canonical_reporter=u'T.C.', lookup_index=0, reporter_index=1, reporter_found='UNITED STATES TAX COURT REPORT')]), ('U.S. of A. 1 UNITED STATES TAX COURT REPORT (2018)', [Citation(volume=1, reporter='T.C.', page=2018, canonical_reporter=u'T.C.', lookup_index=0, reporter_index=4, reporter_found='UNITED STATES TAX COURT REPORT')]), ('U.S. 1234 1 U.S. 1', [Citation(volume=1, reporter='U.S.', page=1, canonical_reporter=u'U.S.', lookup_index=0, reporter_index=3, court='scotus', reporter_found='U.S.')]), ) for q, a in test_pairs: print "Testing citation extraction for %s..." % q, cites_found = get_citations(q) self.assertEqual( cites_found, a, msg='%s\n%s\n\n !=\n\n%s' % ( q, ",\n".join([str(cite.__dict__) for cite in cites_found]), ",\n".join([str(cite.__dict__) for cite in a]), ) ) print "✓"
def test_find_citations(self): """Can we find and make Citation objects from strings?""" test_pairs = ( # Basic test ('1 U.S. 1', [ Citation(volume=1, reporter='U.S.', page=1, canonical_reporter=u'U.S.', lookup_index=0, court='scotus', reporter_index=1, reporter_found='U.S.') ]), # Basic test of non-case name before citation (should not be found) ('lissner test 1 U.S. 1', [ Citation(volume=1, reporter='U.S.', page=1, canonical_reporter=u'U.S.', lookup_index=0, court='scotus', reporter_index=3, reporter_found='U.S.') ]), # Test with plaintiff and defendant ('lissner v. test 1 U.S. 1', [ Citation(plaintiff='lissner', defendant='test', volume=1, reporter='U.S.', page=1, canonical_reporter=u'U.S.', lookup_index=0, court='scotus', reporter_index=4, reporter_found='U.S.') ]), # Test with plaintiff, defendant and year ('lissner v. test 1 U.S. 1 (1982)', [ Citation(plaintiff='lissner', defendant='test', volume=1, reporter='U.S.', page=1, year=1982, canonical_reporter=u'U.S.', lookup_index=0, court='scotus', reporter_index=4, reporter_found='U.S.') ]), # Test with different reporter than all of above. ('bob lissner v. test 1 F.2d 1 (1982)', [ Citation(plaintiff='lissner', defendant='test', volume=1, reporter='F.2d', page=1, year=1982, canonical_reporter=u'F.', lookup_index=0, reporter_index=5, reporter_found='F.2d') ]), # Test with court and extra information ('bob lissner v. test 1 U.S. 12, 347-348 (4th Cir. 1982)', [ Citation(plaintiff='lissner', defendant='test', volume=1, reporter='U.S.', page=12, year=1982, extra=u'347-348', court='ca4', canonical_reporter=u'U.S.', lookup_index=0, reporter_index=5, reporter_found='U.S.') ]), # Test with text before and after and a variant reporter ('asfd 22 U. S. 332 (1975) asdf', [ Citation(volume=22, reporter='U.S.', page=332, year=1975, canonical_reporter=u'U.S.', lookup_index=0, court='scotus', reporter_index=2, reporter_found='U. S.') ]), # Test with finding reporter when it's a second edition ('asdf 22 A.2d 332 asdf', [ Citation(volume=22, reporter='A.2d', page=332, canonical_reporter=u'A.', lookup_index=0, reporter_index=2, reporter_found='A.2d') ]), # Test finding a variant second edition reporter ('asdf 22 A. 2d 332 asdf', [ Citation(volume=22, reporter='A.2d', page=332, canonical_reporter=u'A.', lookup_index=0, reporter_index=2, reporter_found='A. 2d') ]), # Test finding a variant of an edition resolvable by variant alone. ('171 Wn.2d 1016', [ Citation(volume=171, reporter='Wash. 2d', page=1016, canonical_reporter=u'Wash.', lookup_index=1, reporter_index=1, reporter_found='Wn.2d') ]), # Test finding two citations where one of them has abutting # punctuation. ('2 U.S. 3, 4-5 (3 Atl. 33)', [ Citation(volume=2, reporter="U.S.", page=3, extra=u'4-5', canonical_reporter=u"U.S.", lookup_index=0, reporter_index=1, reporter_found="U.S.", court='scotus'), Citation(volume=3, reporter="A.", page=33, canonical_reporter=u"A.", lookup_index=0, reporter_index=5, reporter_found="Atl.") ]), # Test with the page number as a Roman numeral ('12 Neb. App. lxiv (2004)', [ Citation(volume=12, reporter='Neb. Ct. App.', page='lxiv', year=2004, canonical_reporter=u'Neb. Ct. App.', lookup_index=0, reporter_index=1, reporter_found='Neb. App.') ]), # Test with the 'digit-REPORTER-digit' corner-case formatting ('2007-NMCERT-008', [ Citation(volume=2007, reporter='NMCERT', page=8, canonical_reporter=u'NMCERT', lookup_index=0, reporter_index=1, reporter_found='NMCERT') ]), ('2006-Ohio-2095', [ Citation(volume=2006, reporter='Ohio', page=2095, canonical_reporter=u'Ohio', lookup_index=0, reporter_index=1, reporter_found='Ohio') ]), ) for q, a in test_pairs: print "Testing citation extraction for %s..." % q, cites_found = get_citations(q) self.assertEqual( cites_found, a, msg='%s\n%s\n\n !=\n\n%s' % ( q, ",\n".join([str(cite.__dict__) for cite in cites_found]), ",\n".join([str(cite.__dict__) for cite in a]), )) print "✓"
def test_disambiguate_citations(self): test_pairs = [ # 1. P.R.R --> Correct abbreviation for a reporter. ('1 P.R.R. 1', [ Citation(volume=1, reporter='P.R.R.', page=1, canonical_reporter=u'P.R.R.', lookup_index=0, reporter_index=1, reporter_found='P.R.R.') ]), # 2. U. S. --> A simple variant to resolve. ('1 U. S. 1', [ Citation(volume=1, reporter='U.S.', page=1, canonical_reporter=u'U.S.', lookup_index=0, court='scotus', reporter_index=1, reporter_found='U. S.') ]), # 3. A.2d --> Not a variant, but needs to be looked up in the # EDITIONS variable. ('1 A.2d 1', [ Citation(volume=1, reporter='A.2d', page=1, canonical_reporter=u'A.', lookup_index=0, reporter_index=1, reporter_found='A.2d') ]), # 4. A. 2d --> An unambiguous variant of an edition ('1 A. 2d 1', [ Citation(volume=1, reporter='A.2d', page=1, canonical_reporter=u'A.', lookup_index=0, reporter_index=1, reporter_found='A. 2d') ]), # 5. P.R. --> A variant of 'Pen. & W.', 'P.R.R.', or 'P.' that's # resolvable by year ( '1 P.R. 1 (1831)', # Of the three, only Pen & W. was being published this year. [ Citation(volume=1, reporter='Pen. & W.', page=1, canonical_reporter=u'Pen. & W.', lookup_index=0, year=1831, reporter_index=1, reporter_found='P.R.') ]), # 5.1: W.2d --> A variant of an edition that either resolves to # 'Wis. 2d' or 'Wash. 2d' and is resolvable by year. ( '1 W.2d 1 (1854)', # Of the two, only Wis. 2d was being published this year. [ Citation(volume=1, reporter='Wis. 2d', page=1, canonical_reporter=u'Wis.', lookup_index=0, year=1854, reporter_index=1, reporter_found='W.2d') ]), # 5.2: Wash. --> A non-variant that has more than one reporter for # the key, but is resolvable by year ('1 Wash. 1 (1890)', [ Citation(volume=1, reporter='Wash.', page=1, canonical_reporter=u'Wash.', lookup_index=1, year=1890, reporter_index=1, reporter_found='Wash.') ]), # 6. Cr. --> A variant of Cranch, which is ambiguous, except with # paired with this variation. ('1 Cra. 1', [ Citation(volume=1, reporter='Cranch', page=1, canonical_reporter=u'Cranch', lookup_index=0, court='scotus', reporter_index=1, reporter_found='Cra.') ]), # 7. Cranch. --> Not a variant, but could refer to either Cranch's # Supreme Court cases or his DC ones. In this case, we cannot # disambiguate. Years are not known, and we have no further # clues. We must simply drop Cranch from the results. ('1 Cranch 1 1 U.S. 23', [ Citation(volume=1, reporter='U.S.', page=23, canonical_reporter=u'U.S.', lookup_index=0, court='scotus', reporter_index=4, reporter_found='U.S.') ]), # 8. Unsolved problem. In theory, we could use parallel citations # to resolve this, because Rob is getting cited next to La., but # we don't currently know the proximity of citations to each # other, so can't use this. # - Rob. --> Either: # 8.1: A variant of Robards (1862-1865) or # 8.2: Robinson's Louisiana Reports (1841-1846) or # 8.3: Robinson's Virgina Reports (1842-1865) # ('1 Rob. 1 1 La. 1', # [Citation(volume=1, reporter='Rob.', page=1, # canonical_reporter='Rob.', # lookup_index=0), # Citation(volume=1, reporter='La.', page=1, # canonical_reporter='La.', # lookup_index=0)]), ] for pair in test_pairs: print "Testing disambiguation for %s..." % pair[0], citations = get_citations(pair[0], html=False) self.assertEqual(citations, pair[1], msg='%s\n%s != \n%s' % (pair[0], [cite.__dict__ for cite in citations ], [cite.__dict__ for cite in pair[1]])) print "✓"