def get_or_make_docket_entry(d, docket_entry): """Lookup or create a docket entry to match the one that was scraped. :param d: The docket we expect to find it in. :param docket_entry: The scraped dict from Juriscraper for the docket entry. :return Tuple of (de, de_created) or None, where: - de is the DocketEntry object - de_created is a boolean stating whether de was created or not - None is returned when things fail. """ if docket_entry["document_number"]: try: de, de_created = DocketEntry.objects.get_or_create( docket=d, entry_number=docket_entry["document_number"]) except DocketEntry.MultipleObjectsReturned: logger.error( "Multiple docket entries found for document " "entry number '%s' while processing '%s'", docket_entry["document_number"], d, ) return None else: # Unnumbered entry. The only thing we can be sure we have is a # date. Try to find it by date and description (short or long) normalize_long_description(docket_entry) query = Q() if docket_entry.get("description"): query |= Q(description=docket_entry["description"]) if docket_entry.get("short_description"): query |= Q( recap_documents__description=docket_entry["short_description"]) des = DocketEntry.objects.filter( query, docket=d, date_filed=docket_entry["date_filed"], entry_number=docket_entry["document_number"], ) count = des.count() if count == 0: de = DocketEntry(docket=d, entry_number=docket_entry["document_number"]) de_created = True elif count == 1: de = des[0] de_created = False else: logger.warning( "Multiple docket entries returned for unnumbered docket " "entry on date: %s while processing %s. Attempting merge", docket_entry["date_filed"], d, ) # There's so little metadata with unnumbered des that if there's # more than one match, we can just select the oldest as canonical. de = merge_unnumbered_docket_entries(des) de_created = False return de, de_created
def make_documents(self, docket, debug): """Parse through the document nodes, making good objects. For every node, create a line item on the Docket (a DocketEntry), and create 1..n additional RECAPDocuments (attachments or regular documents) that are associated with that DocketEntry. Returns None if an error occurs. """ for doc_node in self.document_list: # Make a DocketEntry object entry_number = int(doc_node.xpath('@doc_num')[0]) attachment_number = int(doc_node.xpath('@attachment_num')[0]) print "Working on document %s, attachment %s" % (entry_number, attachment_number) if attachment_number == 0: document_type = RECAPDocument.PACER_DOCUMENT else: document_type = RECAPDocument.ATTACHMENT try: docket_entry = DocketEntry.objects.get( docket=docket, entry_number=entry_number, ) except DocketEntry.DoesNotExist: if document_type == RECAPDocument.PACER_DOCUMENT: docket_entry = DocketEntry( docket=docket, entry_number=entry_number, ) else: logger.error( "Tried to create attachment without a DocketEntry " "object to associate it with.") continue if document_type == RECAPDocument.PACER_DOCUMENT: date_filed = (self.get_datetime_from_node( doc_node, 'date_filed', cast_to_date=True) or docket_entry.date_filed) docket_entry.date_filed = date_filed docket_entry.description = (self.get_str_from_node( doc_node, 'long_desc') or docket_entry.description) if not debug: docket_entry.save() recap_doc = self.make_recap_document( doc_node, docket_entry, entry_number, attachment_number, document_type, debug, )
def make_documents(self, docket, debug): """Parse through the document nodes, making good objects. For every node, create a line item on the Docket (a DocketEntry), and create 1..n additional RECAPDocuments (attachments or regular documents) that are associated with that DocketEntry. Returns None if an error occurs. """ recap_docs = [] for doc_node in self.document_list: # Make a DocketEntry object entry_number = doc_node.xpath('@doc_num')[0] attachment_number = int(doc_node.xpath('@attachment_num')[0]) logger.info("Working on document %s, attachment %s" % (entry_number, attachment_number)) if attachment_number == 0: document_type = RECAPDocument.PACER_DOCUMENT else: document_type = RECAPDocument.ATTACHMENT try: docket_entry = DocketEntry.objects.get( docket=docket, entry_number=entry_number, ) except DocketEntry.DoesNotExist: if document_type == RECAPDocument.PACER_DOCUMENT: docket_entry = DocketEntry( docket=docket, entry_number=entry_number, ) else: logger.error("Tried to create attachment without a " "DocketEntry object to associate it with.") continue if document_type == RECAPDocument.PACER_DOCUMENT: date_filed = (self.get_datetime_from_node( doc_node, 'date_filed', cast_to_date=True) or docket_entry.date_filed) docket_entry.date_filed = date_filed docket_entry.description = (self.get_str_from_node( doc_node, 'long_desc') or docket_entry.description) try: if not debug: docket_entry.save() except (IntegrityError, DocketEntry.MultipleObjectsReturned): logger.error("Unable to create docket entry for docket " "#%s, on entry: %s." % (docket, entry_number)) continue recap_doc = self.make_recap_document( doc_node, docket_entry, entry_number, attachment_number, document_type, debug, ) if recap_doc is not None: recap_docs.append(recap_doc) return [item.pk for item in recap_docs]
def make_documents(self, docket, debug): """Parse through the document nodes, making good objects. For every node, create a line item on the Docket (a DocketEntry), and create 1..n additional RECAPDocuments (attachments or regular documents) that are associated with that DocketEntry. Returns None if an error occurs. """ for doc_node in self.document_list: # Make a DocketEntry object entry_number = int(doc_node.xpath('@doc_num')[0]) attachment_number = int(doc_node.xpath('@attachment_num')[0]) print "Working on document %s, attachment %s" % (entry_number, attachment_number) if attachment_number == 0: document_type = RECAPDocument.PACER_DOCUMENT else: document_type = RECAPDocument.ATTACHMENT try: docket_entry = DocketEntry.objects.get( docket=docket, entry_number=entry_number, ) except DocketEntry.DoesNotExist: if document_type == RECAPDocument.PACER_DOCUMENT: docket_entry = DocketEntry( docket=docket, entry_number=entry_number, ) else: logger.error( "Tried to create attachment without a DocketEntry " "object to associate it with." ) continue if document_type == RECAPDocument.PACER_DOCUMENT: date_filed = ( self.get_datetime_from_node(doc_node, 'date_filed', cast_to_date=True) or docket_entry.date_filed ) docket_entry.date_filed = date_filed docket_entry.description = ( self.get_str_from_node(doc_node, 'long_desc') or docket_entry.description ) if not debug: docket_entry.save() recap_doc = self.make_recap_document( doc_node, docket_entry, entry_number, attachment_number, document_type, debug, )