def update_docket_info_iquery(self, d_pk): cookies = get_or_cache_pacer_cookies( "pacer_scraper", settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) s = PacerSession( cookies=cookies, username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) d = Docket.objects.get(pk=d_pk) report = CaseQuery(map_cl_to_pacer_id(d.court_id), s) try: report.query(d.pacer_case_id) except (requests.Timeout, requests.RequestException) as exc: logger.warning( "Timeout or unknown RequestException on iquery crawl. " "Trying again if retries not exceeded." ) if self.request.retries == self.max_retries: return raise self.retry(exc=exc) d = update_docket_metadata(d, report.data) d.save() add_bankruptcy_data_to_docket(d, report.data) add_items_to_solr([d.pk], "search.Docket")
def process_docket_data(d, filepath, report_type): """Process docket data file. :param d: A docket object to work on. :param filepath: The path to a saved HTML file containing docket or docket history report data. :param report_type: Whether it's a docket or a docket history report. """ from cl.recap.mergers import ( add_docket_entries, add_parties_and_attorneys, update_docket_appellate_metadata, update_docket_metadata, add_bankruptcy_data_to_docket, add_claims_to_docket, ) court_id = map_cl_to_pacer_id(d.court_id) if report_type == UPLOAD_TYPE.DOCKET: report = DocketReport(court_id) elif report_type == UPLOAD_TYPE.DOCKET_HISTORY_REPORT: report = DocketHistoryReport(court_id) elif report_type == UPLOAD_TYPE.APPELLATE_DOCKET: report = AppellateDocketReport(court_id) elif report_type == UPLOAD_TYPE.IA_XML_FILE: report = InternetArchive(court_id) elif report_type == UPLOAD_TYPE.CASE_REPORT_PAGE: report = CaseQuery(court_id) elif report_type == UPLOAD_TYPE.CLAIMS_REGISTER: report = ClaimsRegister(court_id) else: raise NotImplementedError("The report type with id '%s' is not yet " "supported. Perhaps you need to add it?" % report_type) with open(filepath, "r") as f: text = f.read().decode("utf-8") report._parse_text(text) data = report.data if data == {}: return None if report_type == UPLOAD_TYPE.CLAIMS_REGISTER: add_bankruptcy_data_to_docket(d, data) add_claims_to_docket(d, data["claims"]) else: update_docket_metadata(d, data) d, og_info = update_docket_appellate_metadata(d, data) if og_info is not None: og_info.save() d.originating_court_information = og_info d.save() if data.get("docket_entries"): add_docket_entries(d, data["docket_entries"]) if report_type in ( UPLOAD_TYPE.DOCKET, UPLOAD_TYPE.APPELLATE_DOCKET, UPLOAD_TYPE.IA_XML_FILE, ): add_parties_and_attorneys(d, data["parties"]) return d.pk
class PacerCaseQueryTest(unittest.TestCase): """A test of basic info for the Case Query""" def setUp(self): self.session = get_pacer_session() self.session.login() self.report = CaseQuery("cand", self.session) self.pacer_case_id = "186730" # 4:06-cv-07294 Foley v. Bates @SKIP_IF_NO_PACER_LOGIN def test_query(self): """Can we get the basic info?""" self.report.query(self.pacer_case_id) self.assertIn( "Foley v. Bates", self.report.response.text, msg="Super basic query failed", ) metadata = self.report.metadata self.assertIn( "Foley v. Bates et al", self.report.metadata["case_name_raw"], msg="case_name_raw query failed", ) self.assertEqual( date(2007, 11, 29), self.report.metadata["date_last_filing"], msg="date_last_filing query failed", ) self.assertEqual( date(2007, 5, 7), self.report.metadata["date_terminated"], msg="date_terminated query failed", ) self.assertEqual( date(2006, 11, 27), self.report.metadata["date_filed"], msg="date_filed query failed", )
def update_docket_info_iquery(self, d_pk: int, court_id: str) -> None: """Update the docket info from iquery :param self: The Celery task :param d_pk: The ID of the docket :param court_id: The court of the docket. Needed for throttling by court. :return: None """ cookies = get_or_cache_pacer_cookies( "pacer_scraper", settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) s = PacerSession( cookies=cookies, username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) d = Docket.objects.get(pk=d_pk, court_id=court_id) report = CaseQuery(map_cl_to_pacer_id(d.court_id), s) try: report.query(d.pacer_case_id) except (requests.Timeout, requests.RequestException) as exc: logger.warning( "Timeout or unknown RequestException on iquery crawl. " "Trying again if retries not exceeded." ) if self.request.retries == self.max_retries: return raise self.retry(exc=exc) if not report.data: return save_iquery_to_docket( self, report.data, d, tag_names=None, add_to_solr=True, )
def process_docket_data(d, filepath, report_type): """Process docket data file. :param d: A docket object to work on. :param filepath: The path to a saved HTML file containing docket or docket history report data. :param report_type: Whether it's a docket or a docket history report. """ from cl.recap.tasks import update_docket_metadata, add_docket_entries, \ add_parties_and_attorneys, update_docket_appellate_metadata if report_type == UPLOAD_TYPE.DOCKET: report = DocketReport(map_cl_to_pacer_id(d.court_id)) elif report_type == UPLOAD_TYPE.DOCKET_HISTORY_REPORT: report = DocketHistoryReport(map_cl_to_pacer_id(d.court_id)) elif report_type == UPLOAD_TYPE.APPELLATE_DOCKET: report = AppellateDocketReport(map_cl_to_pacer_id(d.court_id)) elif report_type == UPLOAD_TYPE.IA_XML_FILE: report = InternetArchive() elif report_type == UPLOAD_TYPE.CASE_REPORT_PAGE: report = CaseQuery(map_cl_to_pacer_id(d.court_id)) with open(filepath, 'r') as f: text = f.read().decode('utf-8') report._parse_text(text) data = report.data if data == {}: return None update_docket_metadata(d, data) d, og_info = update_docket_appellate_metadata(d, data) if og_info is not None: og_info.save() d.originating_court_information = og_info d.save() if data.get('docket_entries'): add_docket_entries(d, data['docket_entries']) if report_type in (UPLOAD_TYPE.DOCKET, UPLOAD_TYPE.APPELLATE_DOCKET, UPLOAD_TYPE.IA_XML_FILE): add_parties_and_attorneys(d, data['parties']) return d.pk
def do_case_query_by_pacer_case_id(self, data, court_id, cookies, tag_names=None): """Run a case query (iquery.pl) query on a case and save the data :param data: A dict containing at least the following: { 'pacer_case_id': The internal pacer case ID for the item. } :param court_id: A courtlistener court ID :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a logged-in PACER user. :param tag_names: A list of tag names to associate with the docket when saving it in the DB. :return: A dict with the pacer_case_id and docket_pk values. """ s = PacerSession(cookies=cookies) if data is None: logger.info("Empty data argument. Terminating " "chains and exiting.") self.request.callbacks = None return pacer_case_id = data.get('pacer_case_id') report = CaseQuery(map_cl_to_pacer_id(court_id), s) logger.info("Querying docket report %s.%s" % (court_id, pacer_case_id)) try: d = Docket.objects.get( pacer_case_id=pacer_case_id, court_id=court_id, ) except Docket.DoesNotExist: d = None except Docket.MultipleObjectsReturned: d = None report.query(pacer_case_id) docket_data = report.data logger.info("Querying and parsing complete for %s.%s" % (court_id, pacer_case_id)) if not docket_data: logger.info("No valid docket data for %s.%s", court_id, pacer_case_id) self.request.callbacks = None return # Merge the contents into CL. if d is None: d, count = find_docket_object(court_id, pacer_case_id, docket_data['docket_number']) if count > 1: d = d.earliest('date_created') add_recap_source(d) update_docket_metadata(d, docket_data) d.save() tags = [] if tag_names is not None: for tag_name in tag_names: tag, _ = Tag.objects.get_or_create(name=tag_name) tag.tag_object(d) tags.append(tag) # Add the HTML to the docket in case we need it someday. pacer_file = PacerHtmlFiles(content_object=d, upload_type=UPLOAD_TYPE.CASE_REPORT_PAGE) pacer_file.filepath.save( 'case_report.html', # We only care about the ext w/UUIDFileSystemStorage ContentFile(report.response.text), ) logger.info("Created/updated docket: %s" % d) return { 'pacer_case_id': pacer_case_id, 'docket_pk': d.pk, }
def setUp(self): self.session = get_pacer_session() self.session.login() self.report = CaseQuery("cand", self.session) self.pacer_case_id = "186730" # 4:06-cv-07294 Foley v. Bates
def process_docket_data( d: Docket, report_type: int, filepath: str = None, ) -> Optional[int]: """Process docket data file. :param d: A docket object to work on. :param report_type: Whether it's a docket or a docket history report. :param filepath: A local path where the item can be found. If not provided, the filepath_local field of the docket object will be attempted. """ from cl.recap.mergers import ( add_bankruptcy_data_to_docket, add_claims_to_docket, add_docket_entries, add_parties_and_attorneys, update_docket_appellate_metadata, update_docket_metadata, ) court_id = map_cl_to_pacer_id(d.court_id) if report_type == UPLOAD_TYPE.DOCKET: report = DocketReport(court_id) elif report_type == UPLOAD_TYPE.DOCKET_HISTORY_REPORT: report = DocketHistoryReport(court_id) elif report_type == UPLOAD_TYPE.APPELLATE_DOCKET: report = AppellateDocketReport(court_id) elif report_type == UPLOAD_TYPE.IA_XML_FILE: report = InternetArchive(court_id) elif report_type == UPLOAD_TYPE.CASE_REPORT_PAGE: report = CaseQuery(court_id) elif report_type == UPLOAD_TYPE.CLAIMS_REGISTER: report = ClaimsRegister(court_id) else: raise NotImplementedError( "The report type with id '%s' is not yet " "supported. Perhaps you need to add it?" % report_type ) if filepath: with open(filepath, "r") as f: text = f.read() else: # This is an S3 path, so get it remotely. text = d.filepath_local.read().decode() report._parse_text(text) data = report.data if data == {}: return None if report_type == UPLOAD_TYPE.CLAIMS_REGISTER: add_bankruptcy_data_to_docket(d, data) add_claims_to_docket(d, data["claims"]) else: update_docket_metadata(d, data) d, og_info = update_docket_appellate_metadata(d, data) if og_info is not None: og_info.save() d.originating_court_information = og_info d.save() if data.get("docket_entries"): add_docket_entries(d, data["docket_entries"]) if report_type in ( UPLOAD_TYPE.DOCKET, UPLOAD_TYPE.APPELLATE_DOCKET, UPLOAD_TYPE.IA_XML_FILE, ): add_parties_and_attorneys(d, data["parties"]) return d.pk
def test_scraper_has_session_attribute(self): report = CaseQuery("cand", PacerSession()) try: report.session except AttributeError: self.fail("Did not have session attribute on CaseQuery object.")