def get_and_save_free_document_report(self, court_id, start, end, cookies): """Download the Free document report and save it to the DB. :param self: The Celery task. :param court_id: A pacer court id. :param start: a date object representing the first day to get results. :param end: a date object representing the last day to get results. :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a logged-in PACER user. :return: None """ s = PacerSession(cookies=cookies, username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD) report = FreeOpinionReport(court_id, s) try: report.query(start, end, sort='case_number') except (ConnectionError, ChunkedEncodingError, ReadTimeoutError, ReadTimeout, ConnectTimeout) as exc: logger.warning("Unable to get free document report results from %s " "(%s to %s). Trying again." % (court_id, start, end)) if self.request.retries == self.max_retries: return PACERFreeDocumentLog.SCRAPE_FAILED raise self.retry(exc=exc, countdown=5) except SoftTimeLimitExceeded as exc: logger.warning("Soft time limit exceeded at %s. %s retries remain.", court_id, (self.max_retries - self.request.retries)) if self.request.retries == self.max_retries: return PACERFreeDocumentLog.SCRAPE_FAILED raise self.retry(exc=exc, countdown=5) try: results = report.data except (IndexError, HTTPError) as exc: # IndexError: When the page isn't downloaded properly. # HTTPError: raise_for_status in parse hit bad status. if self.request.retries == self.max_retries: return PACERFreeDocumentLog.SCRAPE_FAILED raise self.retry(exc=exc, countdown=5) for row in results: PACERFreeDocumentRow.objects.create( court_id=row.court_id, pacer_case_id=row.pacer_case_id, docket_number=row.docket_number, case_name=row.case_name, date_filed=row.date_filed, pacer_doc_id=row.pacer_doc_id, document_number=row.document_number, description=row.description, nature_of_suit=row.nature_of_suit, cause=row.cause, ) return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL
def get_and_save_free_document_report(self, court_id, start, end, session): """Download the Free document report and save it to the DB. :param self: The Celery task. :param court_id: A pacer court id. :param start: a date object representing the first day to get results. :param end: a date object representing the last day to get results. :param session: A PACER Session object :return: None """ report = FreeOpinionReport(court_id, session) try: report.query(start, end, sort='case_number') except (ConnectionError, ChunkedEncodingError, ReadTimeoutError, ReadTimeout, ConnectTimeout) as exc: logger.warning("Unable to get free document report results from %s " "(%s to %s). Trying again." % (court_id, start, end)) if self.request.retries == self.max_retries: return PACERFreeDocumentLog.SCRAPE_FAILED raise self.retry(exc=exc, countdown=10) try: results = report.data except (IndexError, HTTPError) as exc: # IndexError: When the page isn't downloaded properly. # HTTPError: raise_for_status in parse hit bad status. if self.request.retries == self.max_retries: return PACERFreeDocumentLog.SCRAPE_FAILED raise self.retry(exc=exc, countdown=10) for row in results: try: PACERFreeDocumentRow.objects.create( court_id=row.court_id, pacer_case_id=row.pacer_case_id, docket_number=row.docket_number, case_name=row.case_name, date_filed=row.date_filed, pacer_doc_id=row.pacer_doc_id, document_number=row.document_number, description=row.description, nature_of_suit=row.nature_of_suit, cause=row.cause, ) except IntegrityError: # Duplicate for whatever reason. continue return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL
def get_free_document_report(self, court_id, start, end, session): """Get structured results from the PACER free document report""" report = FreeOpinionReport(court_id, session) try: report.query(start, end, sort='case_number') except (ConnectionError, ChunkedEncodingError, ReadTimeoutError, ConnectTimeout, HTTPError) as exc: logger.warning("Unable to get free document report results from %s " "(%s to %s). Trying again." % (court_id, start, end)) raise self.retry(exc=exc, countdown=5) try: return report.data except IndexError as exc: # Happens when the page isn't downloaded properly, ugh. raise self.retry(exc=exc, countdown=15)
def get_and_save_free_document_report(self, court_id, start, end, session): """Download the Free document report and save it to the DB. :param self: The Celery task. :param court_id: A pacer court id. :param start: a date object representing the first day to get results. :param end: a date object representing the last day to get results. :param session: A PACER Session object :return: None """ report = FreeOpinionReport(court_id, session) try: responses = report.query(start, end, sort='case_number') except (ConnectionError, ChunkedEncodingError, ReadTimeoutError, ReadTimeout, ConnectTimeout) as exc: logger.warning("Unable to get free document report results from %s " "(%s to %s). Trying again." % (court_id, start, end)) if self.request.retries == self.max_retries: return PACERFreeDocumentLog.SCRAPE_FAILED raise self.retry(exc=exc, countdown=10) try: results = report.parse(responses) except (IndexError, HTTPError) as exc: # IndexError: When the page isn't downloaded properly. # HTTPError: raise_for_status in parse hit bad status. if self.request.retries == self.max_retries: return PACERFreeDocumentLog.SCRAPE_FAILED raise self.retry(exc=exc, countdown=10) for row in results: try: PACERFreeDocumentRow.objects.create( court_id=row.court_id, pacer_case_id=row.pacer_case_id, docket_number=row.docket_number, case_name=row.case_name, date_filed=row.date_filed, pacer_doc_id=row.pacer_doc_id, document_number=row.document_number, description=row.description, nature_of_suit=row.nature_of_suit, cause=row.cause, ) except IntegrityError: # Duplicate for whatever reason. continue return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL
def get_free_document_report(self, court_id, start, end, session): """Get structured results from the PACER free document report""" report = FreeOpinionReport(court_id, session) try: responses = report.query(start, end, sort='case_number') except (ConnectionError, ChunkedEncodingError, ReadTimeoutError, ConnectTimeout) as exc: logger.warning("Unable to get free document report results from %s " "(%s to %s). Trying again." % (court_id, start, end)) raise self.retry(exc=exc, countdown=5) try: return report.parse(responses) except IndexError as exc: # Happens when the page isn't downloaded properly, ugh. raise self.retry(exc=exc, countdown=15)