def parse_recap_docket(self, filename, debug=False): """Parse a docket path, creating items or updating existing ones.""" docket_path = os.path.join(settings.MEDIA_ROOT, 'recap', filename) recap_pks = [] try: pacer_doc = PacerXMLParser(docket_path) except IOError: logger.warning("Unable to find the docket at: %s" % docket_path) else: required_fields = ['case_name', 'date_filed'] for field in required_fields: if not getattr(pacer_doc, field): logger.error("Missing required field: %s" % field) return recap_pks docket = lookup_and_save(pacer_doc, debug=debug) if docket is not None: try: recap_pks = pacer_doc.make_documents(docket, debug=debug) except (IntegrityError, DocketEntry.MultipleObjectsReturned) as exc: raise self.retry(exc=exc, countdown=20 * 60) else: pacer_doc.make_parties(docket, debug=debug) return recap_pks
def parse_items(self): """For every item in the directory, send it to Celery for processing""" docket_paths = get_docket_list() completed = 0 for docket_path in docket_paths: if completed < self.options['start_item'] - 1: # Skip ahead if start_lines is provided. completed += 1 continue else: logger.info("%s: Parsing docket: %s" % (completed, docket_path)) pacer_doc = PacerXMLParser(docket_path) required_fields = ['case_name', 'date_filed'] for field in required_fields: if not getattr(pacer_doc, field): logger.error("Missing required field: %s" % field) continue docket = lookup_and_save(pacer_doc, self.debug) if docket is not None: pacer_doc.make_documents(docket, self.debug) pacer_doc.make_parties(docket, self.debug) completed += 1 max_items = self.options['max_items'] if completed >= max_items != -1: logger.info("\n\nCompleted %s items. Aborting early." % max_items) break
def parse_items(self): """For every item in the directory, send it to Celery for processing""" docket_paths = get_docket_list() completed = 0 for docket_path in docket_paths: if completed < self.options['start_item'] - 1: # Skip ahead if start_lines is provided. completed += 1 continue else: logger.info("%s: Parsing docket: %s" % (completed, docket_path)) pacer_doc = PacerXMLParser(docket_path) required_fields = ['case_name', 'date_filed'] for field in required_fields: if not getattr(pacer_doc, field): logger.error("Missing required field: %s" % field) continue docket = lookup_and_save(pacer_doc, self.debug) if docket is not None: pacer_doc.make_documents(docket, self.debug) pacer_doc.make_parties(docket, self.debug) completed += 1 max_items = self.options['max_items'] if completed >= max_items != -1: print("\n\nCompleted %s items. Aborting early." % max_items) break
def process_free_opinion_result(self, row_pk, cnt): """Process a single result from the free opinion report""" result = PACERFreeDocumentRow.objects.get(pk=row_pk) result.court = Court.objects.get(pk=map_pacer_to_cl_id(result.court_id)) result.case_name = harmonize(result.case_name) result.case_name_short = cnt.make_case_name_short(result.case_name) row_copy = copy.copy(result) # If we don't do this, the doc's date_filed becomes the docket's # date_filed. Bad. delattr(row_copy, 'date_filed') # If we don't do this, we get the PACER court id and it crashes delattr(row_copy, 'court_id') # If we don't do this, the id of result tries to smash that of the docket. delattr(row_copy, 'id') try: with transaction.atomic(): docket = lookup_and_save(row_copy) if not docket: msg = "Unable to create docket for %s" % result logger.error(msg) result.error_msg = msg result.save() self.request.callbacks = None return docket.blocked, docket.date_blocked = get_blocked_status(docket) docket.save() de, de_created = DocketEntry.objects.update_or_create( docket=docket, entry_number=result.document_number, defaults={ 'date_filed': result.date_filed, 'description': result.description, }) rd, rd_created = RECAPDocument.objects.update_or_create( docket_entry=de, document_number=result.document_number, attachment_number=None, defaults={ 'pacer_doc_id': result.pacer_doc_id, 'document_type': RECAPDocument.PACER_DOCUMENT, 'is_free_on_pacer': True, }) except IntegrityError as e: msg = "Raised IntegrityError: %s" % e logger.error(msg) if self.request.retries == self.max_retries: result.error_msg = msg result.save() return raise self.retry(exc=e) except DatabaseError as e: msg = "Unable to complete database transaction:\n%s" % e logger.error(msg) result.error_msg = msg result.save() self.request.callbacks = None return if not rd_created and rd.is_available: # The item already exists and is available. Fantastic, mark it as free, # and call it a day. rd.is_free_on_pacer = True rd.save() result.delete() self.request.callbacks = None return return { 'result': result, 'rd_pk': rd.pk, 'pacer_court_id': result.court_id }
def setUp(self): self.pacer_doc = PacerXMLParser(self.DOCKET_PATH) self.docket = lookup_and_save(self.pacer_doc, debug=False)
def process_free_opinion_result(self, row_pk, cnt): """Process a single result from the free opinion report""" result = PACERFreeDocumentRow.objects.get(pk=row_pk) result.court = Court.objects.get(pk=map_pacer_to_cl_id(result.court_id)) result.case_name = harmonize(result.case_name) result.case_name_short = cnt.make_case_name_short(result.case_name) row_copy = copy.copy(result) # If we don't do this, the doc's date_filed becomes the docket's # date_filed. Bad. delattr(row_copy, 'date_filed') # If we don't do this, we get the PACER court id and it crashes delattr(row_copy, 'court_id') # If we don't do this, the id of result tries to smash that of the docket. delattr(row_copy, 'id') try: with transaction.atomic(): docket = lookup_and_save(row_copy) if not docket: msg = "Unable to create docket for %s" % result logger.error(msg) result.error_msg = msg result.save() self.request.callbacks = None return docket.blocked, docket.date_blocked = get_blocked_status(docket) docket.save() de, de_created = DocketEntry.objects.update_or_create( docket=docket, entry_number=result.document_number, defaults={ 'date_filed': result.date_filed, 'description': result.description, } ) rd, rd_created = RECAPDocument.objects.update_or_create( docket_entry=de, document_number=result.document_number, attachment_number=None, defaults={ 'pacer_doc_id': result.pacer_doc_id, 'document_type': RECAPDocument.PACER_DOCUMENT, 'is_free_on_pacer': True, } ) except IntegrityError as e: msg = "Raised IntegrityError: %s" % e logger.error(msg) if self.request.retries == self.max_retries: result.error_msg = msg result.save() return raise self.retry(exc=e) except DatabaseError as e: msg = "Unable to complete database transaction:\n%s" % e logger.error(msg) result.error_msg = msg result.save() self.request.callbacks = None return if not rd_created and rd.is_available: # The item already exists and is available. Fantastic, mark it as free, # and call it a day. rd.is_free_on_pacer = True rd.save() result.delete() self.request.callbacks = None return return {'result': result, 'rd_pk': rd.pk, 'pacer_court_id': result.court_id}