def get_next_date_range(court_id, span=7): """Get the next start and end query dates for a court. Check the DB for the last date for a court that was completed. Return the day after that date + span days into the future as the range to query for the requested court. If the court is still in progress, return (None, None). :param court_id: A PACER Court ID :param span: The number of days to go forward from the last completed date """ court_id = map_pacer_to_cl_id(court_id) try: last_completion_log = PACERFreeDocumentLog.objects.filter( court_id=court_id, ).exclude( status=PACERFreeDocumentLog.SCRAPE_FAILED, ).latest('date_queried') except PACERFreeDocumentLog.DoesNotExist: print("FAILED ON: %s" % court_id) raise if last_completion_log.status == PACERFreeDocumentLog.SCRAPE_IN_PROGRESS: return None, None last_complete_date = last_completion_log.date_queried next_start_date = last_complete_date + timedelta(days=1) next_end_date = last_complete_date + timedelta(days=span) return next_start_date, next_end_date
def mark_court_in_progress(court_id, d): log = PACERFreeDocumentLog.objects.create( status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS, date_queried=d, court_id=map_pacer_to_cl_id(court_id), ) return log
def get_next_date_range(court_id, span=7): """Get the next start and end query dates for a court. Check the DB for the last date for a court that was completed. Return the day after that date + span days into the future as the range to query for the requested court. If the court is still in progress, return (None, None). :param court_id: A PACER Court ID :param span: The number of days to go forward from the last completed date """ court_id = map_pacer_to_cl_id(court_id) try: last_completion_log = PACERFreeDocumentLog.objects.filter( court_id=court_id, ).exclude( status=PACERFreeDocumentLog.SCRAPE_FAILED, ).latest( 'date_queried') except PACERFreeDocumentLog.DoesNotExist: logger.warn("FAILED ON: %s" % court_id) raise if last_completion_log.status == PACERFreeDocumentLog.SCRAPE_IN_PROGRESS: return None, None last_complete_date = last_completion_log.date_queried next_start_date = last_complete_date + timedelta(days=1) next_end_date = min(now().date(), last_complete_date + timedelta(days=span)) return next_start_date, next_end_date
def get_next_date_range( court_id: str, span: int = 7, ) -> Tuple[Optional[date], Optional[date]]: """Get the next start and end query dates for a court. Check the DB for the last date for a court that was completed. Return the day after that date + span days into the future as the range to query for the requested court. If the court is still in progress, return (None, None). :param court_id: A PACER Court ID :param span: The number of days to go forward from the last completed date """ court_id = map_pacer_to_cl_id(court_id) try: last_completion_log = (PACERFreeDocumentLog.objects.filter( court_id=court_id).exclude(status=PACERFreeDocumentLog. SCRAPE_FAILED).latest("date_queried")) except PACERFreeDocumentLog.DoesNotExist: logger.warning(f"FAILED ON: {court_id}") raise if last_completion_log.status == PACERFreeDocumentLog.SCRAPE_IN_PROGRESS: return None, None # Ensure that we go back five days from the last time we had success if # that success was in the last few days. last_complete_date = min(now().date() - timedelta(days=5), last_completion_log.date_queried) next_end_date = min(now().date(), last_complete_date + timedelta(days=span)) return last_complete_date, next_end_date
def mark_court_done_on_date(status, court_id, d): court_id = map_pacer_to_cl_id(court_id) try: doc_log = PACERFreeDocumentLog.objects.filter( status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS, court_id=court_id, ).latest('date_queried') except PACERFreeDocumentLog.DoesNotExist: return else: doc_log.date_queried = d doc_log.status = status doc_log.date_completed = now() doc_log.save() return status
def process_free_opinion_result(self, row_pk, cnt): """Process a single result from the free opinion report""" result = PACERFreeDocumentRow.objects.get(pk=row_pk) result.court = Court.objects.get(pk=map_pacer_to_cl_id(result.court_id)) result.case_name = harmonize(result.case_name) result.case_name_short = cnt.make_case_name_short(result.case_name) row_copy = copy.copy(result) # If we don't do this, the doc's date_filed becomes the docket's # date_filed. Bad. delattr(row_copy, 'date_filed') # If we don't do this, we get the PACER court id and it crashes delattr(row_copy, 'court_id') # If we don't do this, the id of result tries to smash that of the docket. delattr(row_copy, 'id') try: with transaction.atomic(): docket = lookup_and_save(row_copy) if not docket: msg = "Unable to create docket for %s" % result logger.error(msg) result.error_msg = msg result.save() self.request.callbacks = None return docket.blocked, docket.date_blocked = get_blocked_status(docket) docket.save() de, de_created = DocketEntry.objects.update_or_create( docket=docket, entry_number=result.document_number, defaults={ 'date_filed': result.date_filed, 'description': result.description, }) rd, rd_created = RECAPDocument.objects.update_or_create( docket_entry=de, document_number=result.document_number, attachment_number=None, defaults={ 'pacer_doc_id': result.pacer_doc_id, 'document_type': RECAPDocument.PACER_DOCUMENT, 'is_free_on_pacer': True, }) except IntegrityError as e: msg = "Raised IntegrityError: %s" % e logger.error(msg) if self.request.retries == self.max_retries: result.error_msg = msg result.save() return raise self.retry(exc=e) except DatabaseError as e: msg = "Unable to complete database transaction:\n%s" % e logger.error(msg) result.error_msg = msg result.save() self.request.callbacks = None return if not rd_created and rd.is_available: # The item already exists and is available. Fantastic, mark it as free, # and call it a day. rd.is_free_on_pacer = True rd.save() result.delete() self.request.callbacks = None return return { 'result': result, 'rd_pk': rd.pk, 'pacer_court_id': result.court_id }
def mark_court_in_progress(court_id, d): PACERFreeDocumentLog.objects.create( status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS, date_queried=d, court_id=map_pacer_to_cl_id(court_id), )
def update_docket_appellate_metadata(d, docket_data): """Update the metadata specific to appellate cases.""" if not any([ docket_data.get('originating_court_information'), docket_data.get('appeal_from'), docket_data.get('panel') ]): # Probably not appellate. return d, None d.panel_str = ', '.join(docket_data.get('panel', [])) or d.panel_str d.appellate_fee_status = docket_data.get('fee_status', '') or d.appellate_fee_status d.appellate_case_type_information = docket_data.get( 'case_type_information', '') or d.appellate_case_type_information d.appeal_from_str = docket_data.get('appeal_from', '') or d.appeal_from_str # Do originating court information dict og_info = docket_data.get('originating_court_information') if not og_info: return d, None if og_info.get('court_id'): cl_id = map_pacer_to_cl_id(og_info['court_id']) if Court.objects.filter(pk=cl_id).exists(): # Ensure the court exists. Sometimes PACER does weird things, # like in 14-1743 in CA3, where it says the court_id is 'uspci'. # If we don't do this check, the court ID could be invalid, and # our whole save of the docket fails. d.appeal_from_id = cl_id if d.originating_court_information: d_og_info = d.originating_court_information else: d_og_info = OriginatingCourtInformation() # Ensure we don't share A-Numbers, which can sometimes be in the docket # number field. docket_number = og_info.get('docket_number', '') or d_og_info.docket_number docket_number, _ = anonymize(docket_number) d_og_info.docket_number = docket_number d_og_info.court_reporter = og_info.get('court_reporter', '') or d_og_info.court_reporter d_og_info.date_disposed = og_info.get( 'date_disposed') or d_og_info.date_disposed d_og_info.date_filed = og_info.get('date_filed') or d_og_info.date_filed d_og_info.date_judgment = og_info.get( 'date_judgment') or d_og_info.date_judgment d_og_info.date_judgment_eod = og_info.get( 'date_judgment_eod') or d_og_info.date_judgment_eod d_og_info.date_filed_noa = og_info.get( 'date_filed_noa') or d_og_info.date_filed_noa d_og_info.date_received_coa = og_info.get( 'date_received_coa') or d_og_info.date_received_coa d_og_info.assigned_to_str = og_info.get( 'assigned_to') or d_og_info.assigned_to_str d_og_info.ordering_judge_str = og_info.get( 'ordering_judge') or d_og_info.ordering_judge_str if not all([d.appeal_from_id, d_og_info.date_filed]): # Can't do judge lookups. Call it quits. return d, d_og_info if og_info.get('assigned_to'): judges = get_candidate_judges(og_info['assigned_to'], d.appeal_from_id, d_og_info.date_filed) if judges is not None and len(judges) == 1: d_og_info.assigned_to = judges[0] if og_info.get('ordering_judge'): judges = get_candidate_judges(og_info['ordering_judge'], d.appeal_from_id, d_og_info.date_filed) if judges is not None and len(judges) == 1: d_og_info.ordering_judge = judges[0] return d, d_og_info
def update_docket_appellate_metadata(d, docket_data): """Update the metadata specific to appellate cases.""" if not any( [ docket_data.get("originating_court_information"), docket_data.get("appeal_from"), docket_data.get("panel"), ] ): # Probably not appellate. return d, None d.panel_str = ", ".join(docket_data.get("panel", [])) or d.panel_str d.appellate_fee_status = ( docket_data.get("fee_status", "") or d.appellate_fee_status ) d.appellate_case_type_information = ( docket_data.get("case_type_information", "") or d.appellate_case_type_information ) d.appeal_from_str = docket_data.get("appeal_from", "") or d.appeal_from_str # Do originating court information dict og_info = docket_data.get("originating_court_information") if not og_info: return d, None if og_info.get("court_id"): cl_id = map_pacer_to_cl_id(og_info["court_id"]) if Court.objects.filter(pk=cl_id).exists(): # Ensure the court exists. Sometimes PACER does weird things, # like in 14-1743 in CA3, where it says the court_id is 'uspci'. # If we don't do this check, the court ID could be invalid, and # our whole save of the docket fails. d.appeal_from_id = cl_id if d.originating_court_information: d_og_info = d.originating_court_information else: d_og_info = OriginatingCourtInformation() # Ensure we don't share A-Numbers, which can sometimes be in the docket # number field. docket_number = og_info.get("docket_number", "") or d_og_info.docket_number docket_number, _ = anonymize(docket_number) d_og_info.docket_number = docket_number d_og_info.court_reporter = ( og_info.get("court_reporter", "") or d_og_info.court_reporter ) d_og_info.date_disposed = ( og_info.get("date_disposed") or d_og_info.date_disposed ) d_og_info.date_filed = og_info.get("date_filed") or d_og_info.date_filed d_og_info.date_judgment = ( og_info.get("date_judgment") or d_og_info.date_judgment ) d_og_info.date_judgment_eod = ( og_info.get("date_judgment_eod") or d_og_info.date_judgment_eod ) d_og_info.date_filed_noa = ( og_info.get("date_filed_noa") or d_og_info.date_filed_noa ) d_og_info.date_received_coa = ( og_info.get("date_received_coa") or d_og_info.date_received_coa ) d_og_info.assigned_to_str = ( og_info.get("assigned_to") or d_og_info.assigned_to_str ) d_og_info.ordering_judge_str = ( og_info.get("ordering_judge") or d_og_info.ordering_judge_str ) if not all([d.appeal_from_id, d_og_info.date_filed]): # Can't do judge lookups. Call it quits. return d, d_og_info lookup_judge_by_full_name_and_set_attr( d_og_info, "assigned_to", og_info.get("assigned_to"), d.appeal_from_id, d_og_info.date_filed, ) lookup_judge_by_full_name_and_set_attr( d_og_info, "ordering_judge", og_info.get("ordering_judge"), d.appeal_from_id, d_og_info.date_filed, ) return d, d_og_info
def process_free_opinion_result(self, row_pk, cnt): """Process a single result from the free opinion report""" result = PACERFreeDocumentRow.objects.get(pk=row_pk) result.court = Court.objects.get(pk=map_pacer_to_cl_id(result.court_id)) result.case_name = harmonize(result.case_name) result.case_name_short = cnt.make_case_name_short(result.case_name) row_copy = copy.copy(result) # If we don't do this, the doc's date_filed becomes the docket's # date_filed. Bad. delattr(row_copy, 'date_filed') # If we don't do this, we get the PACER court id and it crashes delattr(row_copy, 'court_id') # If we don't do this, the id of result tries to smash that of the docket. delattr(row_copy, 'id') try: with transaction.atomic(): docket = lookup_and_save(row_copy) if not docket: msg = "Unable to create docket for %s" % result logger.error(msg) result.error_msg = msg result.save() self.request.callbacks = None return docket.blocked, docket.date_blocked = get_blocked_status(docket) docket.save() de, de_created = DocketEntry.objects.update_or_create( docket=docket, entry_number=result.document_number, defaults={ 'date_filed': result.date_filed, 'description': result.description, } ) rd, rd_created = RECAPDocument.objects.update_or_create( docket_entry=de, document_number=result.document_number, attachment_number=None, defaults={ 'pacer_doc_id': result.pacer_doc_id, 'document_type': RECAPDocument.PACER_DOCUMENT, 'is_free_on_pacer': True, } ) except IntegrityError as e: msg = "Raised IntegrityError: %s" % e logger.error(msg) if self.request.retries == self.max_retries: result.error_msg = msg result.save() return raise self.retry(exc=e) except DatabaseError as e: msg = "Unable to complete database transaction:\n%s" % e logger.error(msg) result.error_msg = msg result.save() self.request.callbacks = None return if not rd_created and rd.is_available: # The item already exists and is available. Fantastic, mark it as free, # and call it a day. rd.is_free_on_pacer = True rd.save() result.delete() self.request.callbacks = None return return {'result': result, 'rd_pk': rd.pk, 'pacer_court_id': result.court_id}