コード例 #1
0
ファイル: tasks.py プロジェクト: litewarp/courtlistener
def process_recap_attachment(self, pk, tag_names=None):
    """Process an uploaded attachment page from the RECAP API endpoint.

    :param pk: The primary key of the processing queue item you want to work on
    :param tag_names: A list of tag names to add to all items created or
    modified in this function.
    :return: Tuple indicating the status of the processing and a related
    message
    """
    pq = ProcessingQueue.objects.get(pk=pk)
    mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS)
    logger.info("Processing RECAP item (debug is: %s): %s" % (pq.debug, pq))

    with open(pq.filepath_local.path) as f:
        text = f.read().decode("utf-8")
    att_data = get_data_from_att_report(text, pq.court_id)
    logger.info("Parsing completed for item %s" % pq)

    if att_data == {}:
        # Bad attachment page.
        msg = "Not a valid attachment page upload."
        self.request.chain = None
        return mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT)

    if pq.pacer_case_id in ["undefined", "null"]:
        # Bad data from the client. Fix it with parsed data.
        pq.pacer_case_id = att_data.get("pacer_case_id")
        pq.save()

    try:
        rds_affected, de = merge_attachment_page_data(
            pq.court,
            pq.pacer_case_id,
            att_data["pacer_doc_id"],
            att_data["document_number"],
            text,
            att_data["attachments"],
            pq.debug,
        )
    except RECAPDocument.MultipleObjectsReturned:
        msg = ("Too many documents found when attempting to associate "
               "attachment data")
        return mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED)
    except RECAPDocument.DoesNotExist as exc:
        msg = "Could not find docket to associate with attachment metadata"
        if (self.request.retries == self.max_retries) or pq.debug:
            return mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED)
        else:
            mark_pq_status(pq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY)
            raise self.retry(exc=exc)

    add_tags_to_objs(tag_names, rds_affected)
    return mark_pq_successful(pq, d_id=de.docket_id, de_id=de.pk)
コード例 #2
0
def fetch_attachment_page(self, fq_pk):
    """Fetch a PACER attachment page by rd_pk

    This is very similar to process_recap_attachment, except that it manages
    status as it proceeds and it gets the cookie info from redis.

    :param fq_pk: The PK of the RECAP Fetch Queue to update.
    :return: None
    """
    fq = PacerFetchQueue.objects.get(pk=fq_pk)
    mark_fq_status(fq, "", PROCESSING_STATUS.IN_PROGRESS)

    rd = fq.recap_document
    if not rd.pacer_doc_id:
        msg = (
            "Unable to get attachment page: Unknown pacer_doc_id for "
            "RECAP Document object %s" % rd.pk
        )
        mark_fq_status(fq, msg, PROCESSING_STATUS.NEEDS_INFO)
        return

    cookies = get_pacer_cookie_from_cache(fq.user_id)
    if not cookies:
        msg = "Unable to find cached cookies. Aborting request."
        mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
        return

    try:
        r = get_attachment_page_by_rd(rd.pk, cookies)
    except (requests.RequestException, HTTPError):
        msg = "Failed to get attachment page from network."
        mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
        return

    text = r.response.text
    att_data = get_data_from_att_report(text, rd.docket_entry.docket.court_id,)

    if att_data == {}:
        msg = "Not a valid attachment page upload"
        mark_fq_status(fq, msg, PROCESSING_STATUS.INVALID_CONTENT)
        return

    try:
        merge_attachment_page_data(
            rd.docket_entry.docket.court,
            rd.docket_entry.docket.pacer_case_id,
            att_data["pacer_doc_id"],
            att_data["document_number"],
            text,
            att_data["attachments"],
        )
    except RECAPDocument.MultipleObjectsReturned:
        msg = (
            "Too many documents found when attempting to associate "
            "attachment data"
        )
        mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
        return
    except RECAPDocument.DoesNotExist as exc:
        msg = "Could not find docket to associate with attachment metadata"
        if self.request.retries == self.max_retries:
            mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
            return
        mark_fq_status(fq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY)
        raise self.retry(exc=exc)
    msg = "Successfully completed fetch and save."
    mark_fq_status(fq, msg, PROCESSING_STATUS.SUCCESSFUL)