def test_make_doc1_url(self):
     """Can we make good doc1 urls?"""
     qa_pairs = (
         (('cand', '01712427473', False),
          'https://ecf.cand.uscourts.gov/doc1/01712427473'),
         (('cand', '01702427473', False),
          'https://ecf.cand.uscourts.gov/doc1/01702427473'),
         (('cand', '01712427473', True),
          'https://ecf.cand.uscourts.gov/doc1/01712427473'),
         (('cand', '01702427473', True),
          'https://ecf.cand.uscourts.gov/doc1/01712427473'),
     )
     for q, a in qa_pairs:
         self.assertEqual(make_doc1_url(*q), a)
Beispiel #2
0
 def test_make_doc1_url(self):
     """Can we make good doc1 urls?"""
     qa_pairs = (
         (('cand', '01712427473', False),
          'https://ecf.cand.uscourts.gov/doc1/01712427473'),
         (('cand', '01702427473', False),
          'https://ecf.cand.uscourts.gov/doc1/01702427473'),
         (('cand', '01712427473', True),
          'https://ecf.cand.uscourts.gov/doc1/01712427473'),
         (('cand', '01702427473', True),
          'https://ecf.cand.uscourts.gov/doc1/01712427473'),
     )
     for q, a in qa_pairs:
         self.assertEqual(make_doc1_url(*q), a)
Beispiel #3
0
 def test_auto_login(self, mock_login):
     """Do we automatically log in if needed?"""
     court_id = 'ksd'
     pacer_doc_id = '07902639735'
     url = make_doc1_url(court_id, pacer_doc_id, True)
     pacer_case_id = '81531'
     # This triggers and auto-login because we aren't logged in yet.
     self.session.username = PACER_USERNAME
     self.session.password = PACER_PASSWORD
     _ = self.session.get(url, params={
         'case_id': pacer_case_id,
         'got_receipt': '1',
     }, allow_redirects=True)
     self.assertTrue(mock_login.called,
                     'PacerSession.login() should be called.')
Beispiel #4
0
 def test_auto_login(self, mock_login):
     """Do we automatically log in if needed?"""
     court_id = 'ksd'
     pacer_doc_id = '07902639735'
     url = make_doc1_url(court_id, pacer_doc_id, True)
     pacer_case_id = '81531'
     # This triggers and auto-login because we aren't logged in yet.
     self.session.username = PACER_USERNAME
     self.session.password = PACER_PASSWORD
     _ = self.session.get(url, params={
         'case_id': pacer_case_id,
         'got_receipt': '1',
     }, allow_redirects=True)
     self.assertTrue(mock_login.called,
                     'PacerSession.login() should be called.')
    def download_pdf(self, pacer_case_id, pacer_document_number):
        """Download a PDF from PACER.

        Note that this doesn't support attachments yet.
        """
        timeout = (60, 300)
        url = make_doc1_url(self.court_id, pacer_document_number, True)
        data = {
            'caseid': pacer_case_id,
            'got_receipt': '1',
        }

        logger.info("GETting PDF at URL: %s with params: %s" % (url, data))
        r = self.session.get(url, params=data, timeout=timeout)

        # The request above sometimes generates an HTML page with an iframe
        # containing the PDF, and other times returns the PDF. Our task is thus
        # to either get the src of the iframe and download the PDF or just
        # return the pdf.
        r.raise_for_status()
        if is_pdf(r):
            logger.info('Got PDF binary data for case %s at: %s' % (url, data))
            return r

        text = clean_html(r.text)
        tree = get_html_parsed_text(text)
        tree.rewrite_links(fix_links_in_lxml_tree, base_href=r.url)
        try:
            iframe_src = tree.xpath('//iframe/@src')[0]
        except IndexError:
            if 'pdf:Producer' in text:
                logger.error("Unable to download PDF. PDF content was placed "
                             "directly in HTML. URL: %s, caseid: %s" %
                             (url, pacer_case_id))
            else:
                logger.error(
                    "Unable to download PDF. PDF not served as binary "
                    "data and unable to find iframe src attribute. "
                    "URL: %s, caseid: %s" % (url, pacer_case_id))
            return None

        r = self.session.get(iframe_src, timeout=timeout)
        if is_pdf(r):
            logger.info('Got iframed PDF data for case %s at: %s' %
                        (url, iframe_src))

        return r
Beispiel #6
0
 def test_auto_login(self, mock_login):
     """Do we automatically log in if needed?"""
     court_id = "ksd"
     pacer_doc_id = "07902639735"
     url = make_doc1_url(court_id, pacer_doc_id, True)
     pacer_case_id = "81531"
     # This triggers and auto-login because we aren't logged in yet.
     self.session.username = PACER_USERNAME
     self.session.password = PACER_PASSWORD
     _ = self.session.get(
         url,
         params={
             "case_id": pacer_case_id,
             "got_receipt": "1",
         },
         allow_redirects=True,
     )
     self.assertTrue(mock_login.called,
                     "PacerSession.login() should be called.")