Example #1
0
    def make_recap_document(self, doc_node, docket_entry, entry_number,
                            attachment_number, document_type, debug):
        """Make a PACER document."""
        pacer_document_id = self.get_str_from_node(doc_node, 'pacer_doc_id')
        try:
            recap_doc = RECAPDocument.objects.get(
                pacer_doc_id=pacer_document_id)
        except RECAPDocument.DoesNotExist:
            recap_doc = RECAPDocument(pacer_doc_id=pacer_document_id,
                                      docket_entry=docket_entry)

        recap_doc.date_upload = self.get_datetime_from_node(
            doc_node, 'upload_date')
        recap_doc.document_type = document_type or recap_doc.document_type
        recap_doc.document_number = entry_number or recap_doc.document_number
        # If we can't parse the availability node (it returns None), default it
        # to False.
        availability = self.get_bool_from_node(doc_node, 'available')
        recap_doc.is_available = False if availability is None else availability
        recap_doc.sha1 = self.get_str_from_node(doc_node, 'sha1')
        recap_doc.description = (self.get_str_from_node(
            doc_node, 'short_desc') or recap_doc.description)
        if recap_doc.is_available:
            recap_doc.filepath_ia = get_ia_document_url_from_path(
                self.path, entry_number, attachment_number)
            recap_doc.filepath_local = os.path.join(
                'recap',
                get_local_document_url_from_path(self.path, entry_number,
                                                 attachment_number),
            )
        if document_type == RECAPDocument.ATTACHMENT:
            recap_doc.attachment_number = attachment_number
        if not debug:
            recap_doc.save()
        return recap_doc
Example #2
0
    def make_recap_document(self, doc_node, docket_entry, entry_number,
                            attachment_number, document_type, debug):
        """Make a PACER document."""
        pacer_document_id = self.get_str_from_node(doc_node, 'pacer_doc_id')
        try:
            rd = RECAPDocument.objects.get(
                docket_entry=docket_entry,
                document_number=entry_number,
                # Use the attachment number if it is not 0, else use None.
                attachment_number=attachment_number or None,
            )
        except RECAPDocument.DoesNotExist:
            rd = RECAPDocument(
                docket_entry=docket_entry,
                pacer_doc_id=pacer_document_id,
                document_number=entry_number,
            )
        else:
            rd.pacer_doc_id = pacer_document_id or rd.pacer_doc_id

        rd.date_upload = self.get_datetime_from_node(doc_node, 'upload_date')
        rd.document_type = document_type or rd.document_type

        # If we can't parse the availability node (it returns None), default it
        # to False.
        availability = self.get_bool_from_node(doc_node, 'available')
        rd.is_available = False if availability is None else availability
        rd.sha1 = self.get_str_from_node(doc_node, 'sha1')
        rd.description = (self.get_str_from_node(doc_node, 'short_desc')
                          or rd.description)
        if rd.is_available:
            rd.filepath_ia = get_ia_document_url_from_path(
                self.path, entry_number, attachment_number)
            rd.filepath_local = os.path.join(
                'recap',
                get_local_document_url_from_path(self.path, entry_number,
                                                 attachment_number),
            )
            if rd.page_count is None:
                extension = rd.filepath_local.path.split('.')[-1]
                rd.page_count = get_page_count(rd.filepath_local.path,
                                               extension)
        if document_type == RECAPDocument.ATTACHMENT:
            rd.attachment_number = attachment_number
        if not debug:
            try:
                rd.save(do_extraction=False, index=False)
            except IntegrityError as e:
                # This happens when a pacer_doc_id has been wrongly set as
                # the document_number, see for example, document 19 and
                # document 00405193374 here: https://ia802300.us.archive.org/23/items/gov.uscourts.ca4.14-1872/gov.uscourts.ca4.14-1872.docket.xml
                logger.error(
                    "Unable to create RECAPDocument for document #%s, "
                    "attachment #%s on entry: %s due to "
                    "IntegrityError." %
                    (rd.document_number, rd.attachment_number,
                     rd.docket_entry))
                return None
        return rd
Example #3
0
    def make_recap_document(self, doc_node, docket_entry, entry_number,
                            attachment_number, document_type, debug):
        """Make a PACER document."""
        pacer_document_id = self.get_str_from_node(
            doc_node, 'pacer_doc_id')
        try:
            rd = RECAPDocument.objects.get(
                docket_entry=docket_entry,
                document_number=entry_number,
                # Use the attachment number if it is not 0, else use None.
                attachment_number=attachment_number or None,
            )
        except RECAPDocument.DoesNotExist:
            rd = RECAPDocument(
                docket_entry=docket_entry,
                pacer_doc_id=pacer_document_id,
                document_number=entry_number,
            )
        else:
            rd.pacer_doc_id = pacer_document_id or rd.pacer_doc_id

        rd.date_upload = self.get_datetime_from_node(doc_node, 'upload_date')
        rd.document_type = document_type or rd.document_type

        if not rd.is_available:
            # If we can't parse the availability node (it returns None),
            # default it to False.
            availability = self.get_bool_from_node(doc_node, 'available')
            rd.is_available = False if availability is None else availability
        if not rd.sha1:
            rd.sha1 = self.get_str_from_node(doc_node, 'sha1')
        rd.description = (self.get_str_from_node(doc_node, 'short_desc') or
                          rd.description)
        if rd.is_available:
            rd.filepath_ia = get_ia_document_url_from_path(
                self.path, entry_number, attachment_number)
            rd.filepath_local = os.path.join(
                'recap',
                get_local_document_url_from_path(self.path, entry_number,
                                                 attachment_number),
            )
            if rd.page_count is None:
                extension = rd.filepath_local.path.split('.')[-1]
                rd.page_count = get_page_count(rd.filepath_local.path, extension)
        if document_type == RECAPDocument.ATTACHMENT:
            rd.attachment_number = attachment_number
        if not debug:
            rd.save(do_extraction=False, index=False)
        return rd
Example #4
0
    def make_recap_document(self, doc_node, docket_entry, entry_number,
                            attachment_number, document_type, debug):
        """Make a PACER document."""
        pacer_document_id = self.get_str_from_node(
            doc_node, 'pacer_doc_id')
        try:
            recap_doc = RECAPDocument.objects.get(
                pacer_doc_id=pacer_document_id
            )
        except RECAPDocument.DoesNotExist:
            recap_doc = RECAPDocument(
                pacer_doc_id=pacer_document_id,
                docket_entry=docket_entry
            )

        recap_doc.date_upload = self.get_datetime_from_node(doc_node, 'upload_date')
        recap_doc.document_type = document_type or recap_doc.document_type
        if isinstance(entry_number, int):
            recap_doc.document_number = entry_number

        # If we can't parse the availability node (it returns None), default it
        # to False.
        availability = self.get_bool_from_node(doc_node, 'available')
        recap_doc.is_available = False if availability is None else availability
        recap_doc.sha1 = self.get_str_from_node(doc_node, 'sha1')
        recap_doc.description = (
            self.get_str_from_node(doc_node, 'short_desc') or
            recap_doc.description
        )
        if recap_doc.is_available:
            recap_doc.filepath_ia = get_ia_document_url_from_path(
                self.path, entry_number, attachment_number)
            recap_doc.filepath_local = os.path.join(
                'recap',
                get_local_document_url_from_path(self.path, entry_number,
                                                 attachment_number),
            )
        if document_type == RECAPDocument.ATTACHMENT:
            recap_doc.attachment_number = attachment_number
        if not debug:
            recap_doc.save()
        return recap_doc