def process_recap_attachment(self, pk, tag_names=None): """Process an uploaded attachment page from the RECAP API endpoint. :param pk: The primary key of the processing queue item you want to work on :param tag_names: A list of tag names to add to all items created or modified in this function. :return: Tuple indicating the status of the processing and a related message """ pq = ProcessingQueue.objects.get(pk=pk) mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS) logger.info("Processing RECAP item (debug is: %s): %s" % (pq.debug, pq)) with open(pq.filepath_local.path) as f: text = f.read().decode("utf-8") att_data = get_data_from_att_report(text, pq.court_id) logger.info("Parsing completed for item %s" % pq) if att_data == {}: # Bad attachment page. msg = "Not a valid attachment page upload." self.request.chain = None return mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT) if pq.pacer_case_id in ["undefined", "null"]: # Bad data from the client. Fix it with parsed data. pq.pacer_case_id = att_data.get("pacer_case_id") pq.save() try: rds_affected, de = merge_attachment_page_data( pq.court, pq.pacer_case_id, att_data["pacer_doc_id"], att_data["document_number"], text, att_data["attachments"], pq.debug, ) except RECAPDocument.MultipleObjectsReturned: msg = ("Too many documents found when attempting to associate " "attachment data") return mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED) except RECAPDocument.DoesNotExist as exc: msg = "Could not find docket to associate with attachment metadata" if (self.request.retries == self.max_retries) or pq.debug: return mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED) else: mark_pq_status(pq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY) raise self.retry(exc=exc) add_tags_to_objs(tag_names, rds_affected) return mark_pq_successful(pq, d_id=de.docket_id, de_id=de.pk)
def fetch_attachment_page(self, fq_pk): """Fetch a PACER attachment page by rd_pk This is very similar to process_recap_attachment, except that it manages status as it proceeds and it gets the cookie info from redis. :param fq_pk: The PK of the RECAP Fetch Queue to update. :return: None """ fq = PacerFetchQueue.objects.get(pk=fq_pk) mark_fq_status(fq, "", PROCESSING_STATUS.IN_PROGRESS) rd = fq.recap_document if not rd.pacer_doc_id: msg = ( "Unable to get attachment page: Unknown pacer_doc_id for " "RECAP Document object %s" % rd.pk ) mark_fq_status(fq, msg, PROCESSING_STATUS.NEEDS_INFO) return cookies = get_pacer_cookie_from_cache(fq.user_id) if not cookies: msg = "Unable to find cached cookies. Aborting request." mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED) return try: r = get_attachment_page_by_rd(rd.pk, cookies) except (requests.RequestException, HTTPError): msg = "Failed to get attachment page from network." mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED) return text = r.response.text att_data = get_data_from_att_report(text, rd.docket_entry.docket.court_id,) if att_data == {}: msg = "Not a valid attachment page upload" mark_fq_status(fq, msg, PROCESSING_STATUS.INVALID_CONTENT) return try: merge_attachment_page_data( rd.docket_entry.docket.court, rd.docket_entry.docket.pacer_case_id, att_data["pacer_doc_id"], att_data["document_number"], text, att_data["attachments"], ) except RECAPDocument.MultipleObjectsReturned: msg = ( "Too many documents found when attempting to associate " "attachment data" ) mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED) return except RECAPDocument.DoesNotExist as exc: msg = "Could not find docket to associate with attachment metadata" if self.request.retries == self.max_retries: mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED) return mark_fq_status(fq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY) raise self.retry(exc=exc) msg = "Successfully completed fetch and save." mark_fq_status(fq, msg, PROCESSING_STATUS.SUCCESSFUL)