Exemple #1
0
def get_or_make_docket_entry(d, docket_entry):
    """Lookup or create a docket entry to match the one that was scraped.

    :param d: The docket we expect to find it in.
    :param docket_entry: The scraped dict from Juriscraper for the docket
    entry.
    :return Tuple of (de, de_created) or None, where:
     - de is the DocketEntry object
     - de_created is a boolean stating whether de was created or not
     - None is returned when things fail.
    """
    if docket_entry["document_number"]:
        try:
            de, de_created = DocketEntry.objects.get_or_create(
                docket=d, entry_number=docket_entry["document_number"])
        except DocketEntry.MultipleObjectsReturned:
            logger.error(
                "Multiple docket entries found for document "
                "entry number '%s' while processing '%s'",
                docket_entry["document_number"],
                d,
            )
            return None
    else:
        # Unnumbered entry. The only thing we can be sure we have is a
        # date. Try to find it by date and description (short or long)
        normalize_long_description(docket_entry)
        query = Q()
        if docket_entry.get("description"):
            query |= Q(description=docket_entry["description"])
        if docket_entry.get("short_description"):
            query |= Q(
                recap_documents__description=docket_entry["short_description"])

        des = DocketEntry.objects.filter(
            query,
            docket=d,
            date_filed=docket_entry["date_filed"],
            entry_number=docket_entry["document_number"],
        )
        count = des.count()
        if count == 0:
            de = DocketEntry(docket=d,
                             entry_number=docket_entry["document_number"])
            de_created = True
        elif count == 1:
            de = des[0]
            de_created = False
        else:
            logger.warning(
                "Multiple docket entries returned for unnumbered docket "
                "entry on date: %s while processing %s. Attempting merge",
                docket_entry["date_filed"],
                d,
            )
            # There's so little metadata with unnumbered des that if there's
            # more than one match, we can just select the oldest as canonical.
            de = merge_unnumbered_docket_entries(des)
            de_created = False
    return de, de_created
Exemple #2
0
    def make_documents(self, docket, debug):
        """Parse through the document nodes, making good objects.

        For every node, create a line item on the Docket (a DocketEntry), and
        create 1..n additional RECAPDocuments (attachments or regular documents)
        that are associated with that DocketEntry.

        Returns None if an error occurs.
        """
        for doc_node in self.document_list:
            # Make a DocketEntry object
            entry_number = int(doc_node.xpath('@doc_num')[0])
            attachment_number = int(doc_node.xpath('@attachment_num')[0])
            print "Working on document %s, attachment %s" % (entry_number,
                                                             attachment_number)

            if attachment_number == 0:
                document_type = RECAPDocument.PACER_DOCUMENT
            else:
                document_type = RECAPDocument.ATTACHMENT

            try:
                docket_entry = DocketEntry.objects.get(
                    docket=docket,
                    entry_number=entry_number,
                )
            except DocketEntry.DoesNotExist:
                if document_type == RECAPDocument.PACER_DOCUMENT:
                    docket_entry = DocketEntry(
                        docket=docket,
                        entry_number=entry_number,
                    )
                else:
                    logger.error(
                        "Tried to create attachment without a DocketEntry "
                        "object to associate it with.")
                    continue

            if document_type == RECAPDocument.PACER_DOCUMENT:
                date_filed = (self.get_datetime_from_node(
                    doc_node, 'date_filed', cast_to_date=True)
                              or docket_entry.date_filed)
                docket_entry.date_filed = date_filed
                docket_entry.description = (self.get_str_from_node(
                    doc_node, 'long_desc') or docket_entry.description)
                if not debug:
                    docket_entry.save()

            recap_doc = self.make_recap_document(
                doc_node,
                docket_entry,
                entry_number,
                attachment_number,
                document_type,
                debug,
            )
Exemple #3
0
    def make_documents(self, docket, debug):
        """Parse through the document nodes, making good objects.

        For every node, create a line item on the Docket (a DocketEntry), and
        create 1..n additional RECAPDocuments (attachments or regular documents)
        that are associated with that DocketEntry.

        Returns None if an error occurs.
        """
        recap_docs = []
        for doc_node in self.document_list:
            # Make a DocketEntry object
            entry_number = doc_node.xpath('@doc_num')[0]
            attachment_number = int(doc_node.xpath('@attachment_num')[0])
            logger.info("Working on document %s, attachment %s" %
                        (entry_number, attachment_number))

            if attachment_number == 0:
                document_type = RECAPDocument.PACER_DOCUMENT
            else:
                document_type = RECAPDocument.ATTACHMENT

            try:
                docket_entry = DocketEntry.objects.get(
                    docket=docket,
                    entry_number=entry_number,
                )
            except DocketEntry.DoesNotExist:
                if document_type == RECAPDocument.PACER_DOCUMENT:
                    docket_entry = DocketEntry(
                        docket=docket,
                        entry_number=entry_number,
                    )
                else:
                    logger.error("Tried to create attachment without a "
                                 "DocketEntry object to associate it with.")
                    continue

            if document_type == RECAPDocument.PACER_DOCUMENT:
                date_filed = (self.get_datetime_from_node(
                    doc_node, 'date_filed', cast_to_date=True)
                              or docket_entry.date_filed)
                docket_entry.date_filed = date_filed
                docket_entry.description = (self.get_str_from_node(
                    doc_node, 'long_desc') or docket_entry.description)
                try:
                    if not debug:
                        docket_entry.save()
                except (IntegrityError, DocketEntry.MultipleObjectsReturned):
                    logger.error("Unable to create docket entry for docket "
                                 "#%s, on entry: %s." % (docket, entry_number))
                    continue

            recap_doc = self.make_recap_document(
                doc_node,
                docket_entry,
                entry_number,
                attachment_number,
                document_type,
                debug,
            )
            if recap_doc is not None:
                recap_docs.append(recap_doc)

        return [item.pk for item in recap_docs]
Exemple #4
0
    def make_documents(self, docket, debug):
        """Parse through the document nodes, making good objects.

        For every node, create a line item on the Docket (a DocketEntry), and
        create 1..n additional RECAPDocuments (attachments or regular documents)
        that are associated with that DocketEntry.

        Returns None if an error occurs.
        """
        for doc_node in self.document_list:
            # Make a DocketEntry object
            entry_number = int(doc_node.xpath('@doc_num')[0])
            attachment_number = int(doc_node.xpath('@attachment_num')[0])
            print "Working on document %s, attachment %s" % (entry_number,
                                                             attachment_number)

            if attachment_number == 0:
                document_type = RECAPDocument.PACER_DOCUMENT
            else:
                document_type = RECAPDocument.ATTACHMENT

            try:
                docket_entry = DocketEntry.objects.get(
                    docket=docket,
                    entry_number=entry_number,
                )
            except DocketEntry.DoesNotExist:
                if document_type == RECAPDocument.PACER_DOCUMENT:
                    docket_entry = DocketEntry(
                        docket=docket,
                        entry_number=entry_number,
                    )
                else:
                    logger.error(
                        "Tried to create attachment without a DocketEntry "
                        "object to associate it with."
                    )
                    continue

            if document_type == RECAPDocument.PACER_DOCUMENT:
                date_filed = (
                    self.get_datetime_from_node(doc_node, 'date_filed',
                                                cast_to_date=True) or
                    docket_entry.date_filed
                )
                docket_entry.date_filed = date_filed
                docket_entry.description = (
                    self.get_str_from_node(doc_node, 'long_desc') or
                    docket_entry.description
                )
                if not debug:
                    docket_entry.save()

            recap_doc = self.make_recap_document(
                doc_node,
                docket_entry,
                entry_number,
                attachment_number,
                document_type,
                debug,
            )