def test_only_the_docket_already_exists(self): """Never seen this docket entry before? Alas, we fail. In theory, this shouldn't happen. """ self.de.delete() with self.assertRaises(DocketEntry.DoesNotExist): process_recap_pdf(self.pq.pk) self.pq.refresh_from_db() # This doesn't do the celery retries, unfortunately. If we get that # working, the correct status is self.pq.PROCESSING_FAILED. self.assertEqual(self.pq.status, self.pq.QUEUED_FOR_RETRY) self.assertIn('Unable to find docket entry', self.pq.error_message)
def test_nothing_already_exists(self): """If a PDF is uploaded but there's no recap document and no docket do we fail? In practice, this shouldn't happen. """ self.docket.delete() with self.assertRaises(Docket.DoesNotExist): process_recap_pdf(self.pq.pk) self.pq.refresh_from_db() # This doesn't do the celery retries, unfortunately. If we get that # working, the correct status is self.pq.PROCESSING_FAILED. self.assertEqual(self.pq.status, self.pq.QUEUED_FOR_RETRY) self.assertIn('Unable to find docket', self.pq.error_message)
def test_recap_document_already_exists(self, mock): """We already have everything""" # Update self.rd so it looks like it is already all good. self.rd.is_available = True cf = ContentFile(self.file_content) self.rd.filepath_local.save(self.filename, cf) rd = process_recap_pdf(self.pq.pk) # Did we avoid creating new objects? self.assertEqual(rd, self.rd) self.assertEqual(rd.docket_entry, self.de) self.assertEqual(rd.docket_entry.docket, self.docket) # Did we update pq appropriately? self.pq.refresh_from_db() self.assertEqual(self.pq.status, self.pq.PROCESSING_SUCCESSFUL) self.assertEqual(self.pq.error_message, 'Successful upload! Nice work.') self.assertFalse(self.pq.filepath_local) self.assertEqual(self.pq.docket_id, self.docket.pk) self.assertEqual(self.pq.docket_entry_id, self.de.pk) self.assertEqual(self.pq.recap_document_id, self.rd.pk) # Did we correctly avoid running document extraction? mock.assert_not_called()
def test_debug_does_not_create_rd(self, mock): """If debug is passed, do we avoid creating recap documents?""" docket = Docket.objects.create(source=0, court_id='scotus', pacer_case_id='asdf') DocketEntry.objects.create(docket=docket, entry_number=1) pq = ProcessingQueue.objects.create( court_id='scotus', uploader=self.user, pacer_case_id='asdf', pacer_doc_id='asdf', document_number='1', filepath_local=self.pdf, upload_type=UPLOAD_TYPE.PDF, debug=True, ) process_recap_pdf(pq.pk) self.assertEqual(RECAPDocument.objects.count(), 0) mock.assert_not_called()
def process_orphan_documents( rds_created: List[RECAPDocument], court_id: int, docket_date: date, ) -> None: """After we finish processing a docket upload add any PDFs we already had for that docket that were lingering in our processing queue. This addresses the issue that arises when somebody (somehow) uploads a PDF without first uploading a docket. """ pacer_doc_ids = [rd.pacer_doc_id for rd in rds_created] if docket_date: # If we get a date from the docket, set the cutoff to 30 days prior for # good measure. cutoff_date = docket_date - timedelta(days=30) else: # No date from docket. Limit ourselves to the last 180 days. This will # help prevent items with weird errors from plaguing us forever. cutoff_date = now() - timedelta(days=180) pqs = ProcessingQueue.objects.filter( pacer_doc_id__in=pacer_doc_ids, court_id=court_id, status=PROCESSING_STATUS.FAILED, upload_type=UPLOAD_TYPE.PDF, debug=False, date_modified__gt=cutoff_date, ).values_list("pk", flat=True) for pq in pqs: try: from cl.recap.tasks import process_recap_pdf process_recap_pdf(pq) except: # We can ignore this. If we don't, we get all of the # exceptions that were previously raised for the # processing queue items a second time. pass
def test_docket_and_docket_entry_already_exist(self, mock): """What happens if we have everything but the PDF? This is the good case. We simply create a new item. """ self.rd.delete() rd = process_recap_pdf(self.pq.pk) self.assertTrue(rd.is_available) self.assertTrue(rd.sha1) self.assertTrue(rd.filepath_local) mock.assert_called_once() self.assertIn('gov.uscourts.scotus.asdf.1.0', rd.filepath_local.name) self.pq.refresh_from_db() self.assertEqual(self.pq.status, self.pq.PROCESSING_SUCCESSFUL) self.assertFalse(self.pq.error_message) self.assertFalse(self.pq.filepath_local)
def test_recap_document_already_exists(self, mock): """We already have everything""" # Update self.rd so it looks like it is already all good. self.rd.is_available = True cf = ContentFile(self.file_content) self.rd.filepath_local.save(self.filename, cf) rd = process_recap_pdf(self.pq.pk) # Did we avoid creating new objects? self.assertEqual(rd, self.rd) self.assertEqual(rd.docket_entry, self.de) self.assertEqual(rd.docket_entry.docket, self.docket) # Did we update pq appropriately? self.pq.refresh_from_db() self.assertEqual(self.pq.status, self.pq.PROCESSING_SUCCESSFUL) self.assertFalse(self.pq.error_message) self.assertFalse(self.pq.filepath_local) # Did we correctly avoid running document extraction? mock.assert_not_called()