def test_make_doc1_url(self): """Can we make good doc1 urls?""" qa_pairs = ( (('cand', '01712427473', False), 'https://ecf.cand.uscourts.gov/doc1/01712427473'), (('cand', '01702427473', False), 'https://ecf.cand.uscourts.gov/doc1/01702427473'), (('cand', '01712427473', True), 'https://ecf.cand.uscourts.gov/doc1/01712427473'), (('cand', '01702427473', True), 'https://ecf.cand.uscourts.gov/doc1/01712427473'), ) for q, a in qa_pairs: self.assertEqual(make_doc1_url(*q), a)
def test_make_doc1_url(self): """Can we make good doc1 urls?""" qa_pairs = ( (('cand', '01712427473', False), 'https://ecf.cand.uscourts.gov/doc1/01712427473'), (('cand', '01702427473', False), 'https://ecf.cand.uscourts.gov/doc1/01702427473'), (('cand', '01712427473', True), 'https://ecf.cand.uscourts.gov/doc1/01712427473'), (('cand', '01702427473', True), 'https://ecf.cand.uscourts.gov/doc1/01712427473'), ) for q, a in qa_pairs: self.assertEqual(make_doc1_url(*q), a)
def test_auto_login(self, mock_login): """Do we automatically log in if needed?""" court_id = 'ksd' pacer_doc_id = '07902639735' url = make_doc1_url(court_id, pacer_doc_id, True) pacer_case_id = '81531' # This triggers and auto-login because we aren't logged in yet. self.session.username = PACER_USERNAME self.session.password = PACER_PASSWORD _ = self.session.get(url, params={ 'case_id': pacer_case_id, 'got_receipt': '1', }, allow_redirects=True) self.assertTrue(mock_login.called, 'PacerSession.login() should be called.')
def test_auto_login(self, mock_login): """Do we automatically log in if needed?""" court_id = 'ksd' pacer_doc_id = '07902639735' url = make_doc1_url(court_id, pacer_doc_id, True) pacer_case_id = '81531' # This triggers and auto-login because we aren't logged in yet. self.session.username = PACER_USERNAME self.session.password = PACER_PASSWORD _ = self.session.get(url, params={ 'case_id': pacer_case_id, 'got_receipt': '1', }, allow_redirects=True) self.assertTrue(mock_login.called, 'PacerSession.login() should be called.')
def download_pdf(self, pacer_case_id, pacer_document_number): """Download a PDF from PACER. Note that this doesn't support attachments yet. """ timeout = (60, 300) url = make_doc1_url(self.court_id, pacer_document_number, True) data = { 'caseid': pacer_case_id, 'got_receipt': '1', } logger.info("GETting PDF at URL: %s with params: %s" % (url, data)) r = self.session.get(url, params=data, timeout=timeout) # The request above sometimes generates an HTML page with an iframe # containing the PDF, and other times returns the PDF. Our task is thus # to either get the src of the iframe and download the PDF or just # return the pdf. r.raise_for_status() if is_pdf(r): logger.info('Got PDF binary data for case %s at: %s' % (url, data)) return r text = clean_html(r.text) tree = get_html_parsed_text(text) tree.rewrite_links(fix_links_in_lxml_tree, base_href=r.url) try: iframe_src = tree.xpath('//iframe/@src')[0] except IndexError: if 'pdf:Producer' in text: logger.error("Unable to download PDF. PDF content was placed " "directly in HTML. URL: %s, caseid: %s" % (url, pacer_case_id)) else: logger.error( "Unable to download PDF. PDF not served as binary " "data and unable to find iframe src attribute. " "URL: %s, caseid: %s" % (url, pacer_case_id)) return None r = self.session.get(iframe_src, timeout=timeout) if is_pdf(r): logger.info('Got iframed PDF data for case %s at: %s' % (url, iframe_src)) return r
def test_auto_login(self, mock_login): """Do we automatically log in if needed?""" court_id = "ksd" pacer_doc_id = "07902639735" url = make_doc1_url(court_id, pacer_doc_id, True) pacer_case_id = "81531" # This triggers and auto-login because we aren't logged in yet. self.session.username = PACER_USERNAME self.session.password = PACER_PASSWORD _ = self.session.get( url, params={ "case_id": pacer_case_id, "got_receipt": "1", }, allow_redirects=True, ) self.assertTrue(mock_login.called, "PacerSession.login() should be called.")