def test_enqueue_opinions(self): test_date = date(2011, 06, 17) expected_html = open(TEST_OPINION_PATH + "nysd.2011_06_17.opinions.html").read() expected_dockets = PP.parse_opinions(expected_html, 'nysd') expected_pdf_bits = open(TEST_PDF_PATH + 'gov.uscourts.nysd.351385.14.0.pdf').read() # Set up the test environment # monkey patch out dependencies on pacer and rabbitmq self.downloader.get_opinions = Mock(return_value=expected_dockets) self.downloader.get_document = Mock(return_value=expected_pdf_bits) self.downloader.channel.basic_publish = mock_publish_call = Mock() self.downloader.enqueue_opinions('nysd', test_date, test_date) self.assertEquals(2, mock_publish_call.call_count) messages = [pickle.loads(args[1]['body']) \ for args in mock_publish_call.call_args_list] filenames = [m['docket_filename'] for m in messages] document_maps = [m['docnums_to_filename'] for m in messages] #print filenames for filename in filenames: with open(filename) as f: self.assertTrue(f.read() != None) # cleanup os.unlink(filename) for docmap in document_maps: self.assertEquals(1, len(docmap)) self.assertTrue(re.match(r'\d+-0', docmap.keys()[0]))
def test_enqueue_opinions(self): test_date = date(2011, 06, 17) expected_html = open(TEST_OPINION_PATH + "nysd.2011_06_17.opinions.html").read() expected_dockets = PP.parse_opinions(expected_html, 'nysd') expected_pdf_bits = open(TEST_PDF_PATH + 'gov.uscourts.nysd.351385.14.0.pdf').read() # Set up the test environment # monkey patch out dependencies on pacer and rabbitmq self.downloader.get_opinions = Mock(return_value=expected_dockets) self.downloader.get_document = Mock(return_value=expected_pdf_bits) self.downloader.channel.basic_publish = mock_publish_call = Mock() self.downloader.enqueue_opinions('nysd', test_date, test_date) self.assertEquals(2, mock_publish_call.call_count) messages = [pickle.loads(args[1]['body']) \ for args in mock_publish_call.call_args_list] filenames = [m['docket_filename'] for m in messages] document_maps = [m['docnums_to_filename'] for m in messages] #print filenames for filename in filenames: with open(filename) as f: self.assertTrue(f.read() != None) # cleanup os.unlink(filename) for docmap in document_maps: self.assertEquals(1, len(docmap)) self.assertTrue(re.match(r'\d+-0', docmap.keys()[0]))
def test_download_opinions(self): #TODO: Change this test to only use html tests. # Pacer client tests shouldn't depend on parse pacer test_date = date(2011, 06, 17) expected_html = open(TEST_OPINION_PATH + "nysd.2011_06_17.opinions.html").read() expected_dockets = PP.parse_opinions(expected_html, 'nysd') opinions_html = self.pacer_client.get_opinions_html(court='nysd', start_date = test_date, end_date = test_date) dockets = PP.parse_opinions(opinions_html, 'nysd') self.assertEquals(len(expected_dockets), len(dockets)) for d in dockets: self.assertTrue(d.get_casenum() in [e.get_casenum() for e in expected_dockets]) casemeta = d.get_casemeta() self.assertTrue(casemeta['case_name'] in [e.get_casemeta()['case_name'] for e in expected_dockets])
def test_download_opinions(self): #TODO: Change this test to only use html tests. # Pacer client tests shouldn't depend on parse pacer test_date = date(2011, 06, 17) expected_html = open(TEST_OPINION_PATH + "nysd.2011_06_17.opinions.html").read() expected_dockets = PP.parse_opinions(expected_html, 'nysd') opinions_html = self.pacer_client.get_opinions_html( court='nysd', start_date=test_date, end_date=test_date) dockets = PP.parse_opinions(opinions_html, 'nysd') self.assertEquals(len(expected_dockets), len(dockets)) for d in dockets: self.assertTrue( d.get_casenum() in [e.get_casenum() for e in expected_dockets]) casemeta = d.get_casemeta() self.assertTrue( casemeta['case_name'] in [e.get_casemeta()['case_name'] for e in expected_dockets])
def test_parse_opinions(self): opinion_filelist = ["akd.1900", "akd.2010", "nysd.2009"] #test empty file filebits = open('/dev/null').read() dockets = PP.parse_opinions(filebits, 'test') self.assertEquals([], dockets) filebits = {} for opinion_file in opinion_filelist: f = open(TEST_OPINION_PATH + opinion_file + ".opinions.html") filebits[opinion_file] = f.read() f.close() #test valid opinion file with no entries dockets = PP.parse_opinions(filebits["akd.1900"], "akd") self.assertEquals([], dockets) dockets = PP.parse_opinions(filebits["akd.2010"], "akd") self.assertEquals(78, len(dockets) ) # number of entries in the opinions table #check basic metadata self.assertEquals("akd", dockets[0].get_court()) self.assertEquals("12460", dockets[0].get_casenum()) casemeta = dockets[0].get_casemeta() self.assertEquals("Steffensen v. City of Fairbanks et al", casemeta['case_name']) self.assertEquals("4:09-cv-00004-RJB", casemeta['docket_num']) self.assertEquals("42:1983 Prisoner Civil Rights", casemeta["case_cause"]) self.assertEquals("Civil Rights: Other", casemeta["nature_of_suit"]) self.assertEquals(1, len(dockets[0].documents)) document = dockets[0].documents['98-0'] self.assertEquals("98", document['doc_num']) self.assertEquals("0", document['attachment_num']) self.assertEquals("563", document['pacer_de_seq_num']) self.assertEquals("602530", document['pacer_dm_id']) self.assertEquals("2010-01-05", document['date_filed']) self.assertEquals("Order Dismissing Case", document['long_desc']) self.assertEquals("akd", dockets[1].get_court()) self.assertEquals("18239", dockets[1].get_casenum()) casemeta = dockets[1].get_casemeta() self.assertEquals("Kahle v. Executive Force Australia PTY LTD", casemeta['case_name']) self.assertEquals("2:09-cv-00008-JWS", casemeta['docket_num']) self.assertEquals("28:1441 Petition for Removal- Personal Injury", casemeta["case_cause"]) self.assertEquals("Personal Inj. Prod. Liability", casemeta["nature_of_suit"]) self.assertEquals(1, len(dockets[1].documents)) document = dockets[1].documents['27-0'] self.assertEquals("27", document['doc_num']) self.assertEquals("0", document['attachment_num']) self.assertEquals("142", document['pacer_de_seq_num']) self.assertEquals("603861", document['pacer_dm_id']) self.assertEquals("2010-01-07", document['date_filed']) self.assertEquals("Order on Motion for Hearing, Order on Motion to Amend/Correct, Order on Motion to Remand to State Court, Order on Motion to Strike", document['long_desc']) self.assertEquals("akd", dockets[5].get_court()) self.assertEquals("15580", dockets[5].get_casenum()) casemeta = dockets[5].get_casemeta() self.assertEquals("USA v. Celestine et al", casemeta['case_name']) self.assertEquals("3:2009-cr-00065-HRH", casemeta['docket_num']) self.assertEquals(None, casemeta.get("case_cause")) self.assertEquals(None, casemeta.get("nature_of_suit")) self.assertEquals(1, len(dockets[5].documents)) document = dockets[5].documents['135-0'] self.assertEquals("135", document['doc_num']) self.assertEquals("0", document['attachment_num']) self.assertEquals("794", document['pacer_de_seq_num']) self.assertEquals("616260", document['pacer_dm_id']) self.assertEquals(datetime.date.today().isoformat(), document['date_filed']) self.assertEquals("Order on Motion for Bill of Particulars, Order on Motion for Joinder", document['long_desc']) #Sometimes the document url case id does not match the court case id # In these cases we want to use the parent case number, but also have access to the child casenum self.assertEquals("akd", dockets[2].get_court()) self.assertEquals("4655", dockets[2].get_casenum()) casemeta = dockets[2].get_casemeta() self.assertEquals("USA v. Kott et al", casemeta['case_name']) self.assertEquals("3:2007-cr-00056-JWS", casemeta['docket_num']) self.assertEquals(1, len(dockets[2].documents)) document = dockets[2].documents['429-0'] self.assertEquals("429", document['doc_num']) self.assertEquals("0", document['attachment_num']) self.assertEquals("1946", document['pacer_de_seq_num']) self.assertEquals("606429", document['pacer_dm_id']) self.assertEquals("2010-01-13", document['date_filed']) self.assertEquals("Order on Motion to Dismiss", document['long_desc']) self.assertEquals("4656", document['casenum']) # Some dockets have a different linking format from akd. Let's test these out dockets = PP.parse_opinions(filebits["nysd.2009"], "nysd") self.assertEquals(5916, len(dockets) ) # number of entries in the opinions table self.assertEquals("nysd", dockets[0].get_court()) self.assertEquals("53122", dockets[0].get_casenum()) casemeta = dockets[0].get_casemeta() self.assertEquals("Kingsway Financial v. Pricewaterhouse, et al", casemeta['case_name']) self.assertEquals("1:03-cv-05560-RMB-HBP", casemeta['docket_num']) self.assertEquals("15:78m(a) Securities Exchange Act", casemeta["case_cause"]) self.assertEquals("Securities/Commodities", casemeta["nature_of_suit"]) self.assertEquals(1, len(dockets[0].documents)) document = dockets[0].documents['380-0'] self.assertEquals("380", document['doc_num']) self.assertEquals("0", document['attachment_num']) self.assertEquals("6095482", document['pacer_de_seq_num']) self.assertEquals("5453339", document['pacer_dm_id']) self.assertEquals("2009-01-05", document['date_filed']) self.assertEquals("Memorandum & Opinion", document['long_desc']) # Some sanity checks about iquery type opinion pages for docket in dockets: self.assertEquals(1, len(docket.documents)) document = docket.documents.values()[0] casenum_diff = int(docket.get_casenum()) - int(document['casenum']) self.assertTrue(casenum_diff <= 0)