def process_recap_docket_history_report(self, pk): """Process the docket history report. :param pk: The primary key of the processing queue item you want to work on :returns: A dict indicating whether the docket needs Solr re-indexing. """ start_time = now() pq = ProcessingQueue.objects.get(pk=pk) mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS) logger.info("Processing RECAP item (debug is: %s): %s" % (pq.debug, pq)) try: text = pq.filepath_local.read().decode("utf-8") except IOError as exc: msg = "Internal processing error (%s: %s)." % (exc.errno, exc.strerror) if (self.request.retries == self.max_retries) or pq.debug: mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED) return None else: mark_pq_status(pq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY) raise self.retry(exc=exc) report = DocketHistoryReport(map_cl_to_pacer_id(pq.court_id)) report._parse_text(text) data = report.data logger.info("Parsing completed for item %s" % pq) if data == {}: # Bad docket history page. msg = "Not a valid docket history page upload." mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT) self.request.chain = None return None # Merge the contents of the docket into CL. d, docket_count = find_docket_object( pq.court_id, pq.pacer_case_id, data["docket_number"] ) if docket_count > 1: logger.info( "Found %s dockets during lookup. Choosing oldest." % docket_count ) d = d.earliest("date_created") d.add_recap_source() update_docket_metadata(d, data) if pq.debug: mark_pq_successful(pq, d_id=d.pk) self.request.chain = None return {"docket_pk": d.pk, "content_updated": False} try: d.save() except IntegrityError as exc: logger.warning( "Race condition experienced while attempting docket save." ) error_message = "Unable to save docket due to IntegrityError." if self.request.retries == self.max_retries: mark_pq_status(pq, error_message, PROCESSING_STATUS.FAILED) self.request.chain = None return None else: mark_pq_status( pq, error_message, PROCESSING_STATUS.QUEUED_FOR_RETRY ) raise self.retry(exc=exc) # Add the HTML to the docket in case we need it someday. pacer_file = PacerHtmlFiles( content_object=d, upload_type=UPLOAD_TYPE.DOCKET_HISTORY_REPORT ) pacer_file.filepath.save( # We only care about the ext w/UUIDFileSystemStorage "docket_history.html", ContentFile(text), ) rds_created, content_updated = add_docket_entries( d, data["docket_entries"] ) process_orphan_documents(rds_created, pq.court_id, d.date_filed) if content_updated and docket_count > 0: newly_enqueued = enqueue_docket_alert(d.pk) if newly_enqueued: send_docket_alert(d.pk, start_time) mark_pq_successful(pq, d_id=d.pk) return { "docket_pk": d.pk, "content_updated": bool(rds_created or content_updated), }
def process_recap_docket_history_report(self, pk): """Process the docket history report. :param pk: The primary key of the processing queue item you want to work on :returns: A dict indicating whether the docket needs Solr re-indexing. """ pq = ProcessingQueue.objects.get(pk=pk) mark_pq_status(pq, '', pq.PROCESSING_IN_PROGRESS) logger.info("Processing RECAP item (debug is: %s): %s" % (pq.debug, pq)) report = DocketHistoryReport(map_cl_to_pacer_id(pq.court_id)) with open(pq.filepath_local.path) as f: text = f.read().decode('utf-8') report._parse_text(text) data = report.data logger.info("Parsing completed for item %s" % pq) # Merge the contents of the docket into CL. d, count = find_docket_object(pq.court_id, pq.pacer_case_id, data['docket_number']) if count > 1: logger.info("Found %s dockets during lookup. Choosing oldest." % count) d = d.earliest('date_created') add_recap_source(d) update_docket_metadata(d, data) if pq.debug: mark_pq_successful(pq, d_id=d.pk) self.request.callbacks = None return {'docket_pk': d.pk, 'needs_solr_update': False} try: d.save() except IntegrityError as exc: logger.warning("Race condition experienced while attempting docket " "save.") error_message = "Unable to save docket due to IntegrityError." if self.request.retries == self.max_retries: mark_pq_status(pq, error_message, pq.PROCESSING_FAILED) self.request.callbacks = None return None else: mark_pq_status(pq, error_message, pq.QUEUED_FOR_RETRY) raise self.retry(exc=exc) # Add the HTML to the docket in case we need it someday. pacer_file = PacerHtmlFiles(content_object=d, upload_type=DOCKET_HISTORY_REPORT) pacer_file.filepath.save( 'docket_history.html', # We only care about the ext w/UUIDFileSystemStorage ContentFile(text), ) rds_created, needs_solr_update = add_docket_entries(d, data['docket_entries']) process_orphan_documents(rds_created, pq.court_id, d.date_filed) mark_pq_successful(pq, d_id=d.pk) return { 'docket_pk': d.pk, 'needs_solr_update': bool(rds_created or needs_solr_update), }