def parse_items(self): """For every item in the directory, send it to Celery for processing""" docket_paths, file_count = get_docket_list() completed = 0 for docket_path in docket_paths: if completed < self.options['start_item'] - 1: # Skip ahead if start_lines is provided. completed += 1 continue else: logger.info("Parsing docket: %s" % docket_path) pacer_doc = PacerXMLParser(docket_path) docket = pacer_doc.save(self.debug) if docket is not None: pacer_doc.make_documents(docket, self.debug) completed += 1 max_items = self.options['max_items'] if completed >= max_items and max_items != -1: print "\n\nCompleted %s items. Aborting early." % max_items break
def parse_recap_docket(self, filename, debug=False): """Parse a docket path, creating items or updating existing ones.""" docket_path = os.path.join(settings.MEDIA_ROOT, 'recap', filename) recap_pks = [] try: pacer_doc = PacerXMLParser(docket_path) except IOError: logger.warning("Unable to find the docket at: %s" % docket_path) else: required_fields = ['case_name', 'date_filed'] for field in required_fields: if not getattr(pacer_doc, field): logger.error("Missing required field: %s" % field) return recap_pks docket = lookup_and_save(pacer_doc, debug=debug) if docket is not None: try: recap_pks = pacer_doc.make_documents(docket, debug=debug) except (IntegrityError, DocketEntry.MultipleObjectsReturned) as exc: raise self.retry(exc=exc, countdown=20 * 60) else: pacer_doc.make_parties(docket, debug=debug) return recap_pks
def parse_items(self): """For every item in the directory, send it to Celery for processing""" docket_paths = get_docket_list() completed = 0 for docket_path in docket_paths: if completed < self.options['start_item'] - 1: # Skip ahead if start_lines is provided. completed += 1 continue else: logger.info("%s: Parsing docket: %s" % (completed, docket_path)) pacer_doc = PacerXMLParser(docket_path) required_fields = ['case_name', 'date_filed'] for field in required_fields: if not getattr(pacer_doc, field): logger.error("Missing required field: %s" % field) continue docket = lookup_and_save(pacer_doc, self.debug) if docket is not None: pacer_doc.make_documents(docket, self.debug) pacer_doc.make_parties(docket, self.debug) completed += 1 max_items = self.options['max_items'] if completed >= max_items != -1: print("\n\nCompleted %s items. Aborting early." % max_items) break
def parse_recap_docket(self, filename, debug=False): """Parse a docket path, creating items or updating existing ones.""" docket_path = os.path.join(settings.MEDIA_ROOT, 'recap', filename) recap_pks = [] try: pacer_doc = PacerXMLParser(docket_path) except IOError: logger.warning("Unable to find the docket at: %s" % docket_path) else: docket = pacer_doc.save(debug=debug) if docket is not None: try: recap_pks = pacer_doc.make_documents(docket, debug=debug) except (IntegrityError, DocketEntry.MultipleObjectsReturned) as exc: raise self.retry(exc=exc, countdown=20 * 60) return recap_pks
class PacerDocketParserTest(TestCase): """Can we parse RECAP dockets successfully?""" NUM_PARTIES = 3 NUM_PETRO_ATTYS = 6 NUM_FLOYD_ROLES = 3 DOCKET_PATH = os.path.join(settings.MEDIA_ROOT, 'test', 'xml', 'gov.uscourts.akd.41664.docket.xml') def setUp(self): self.pacer_doc = PacerXMLParser(self.DOCKET_PATH) self.docket = lookup_and_save(self.pacer_doc, debug=False) def tearDown(self): Docket.objects.all().delete() Party.objects.all().delete() Attorney.objects.all().delete() AttorneyOrganization.objects.all().delete() def test_party_parsing(self): """Can we parse an XML docket and get good results in the DB""" self.pacer_doc.make_parties(self.docket, debug=False) self.assertEqual(self.docket.parties.all().count(), self.NUM_PARTIES) petro = self.docket.parties.get(name__contains="Petro") self.assertEqual(petro.party_types.all()[0].name, "Plaintiff") attorneys = petro.attorneys.all().distinct() self.assertEqual(attorneys.count(), self.NUM_PETRO_ATTYS) floyd = petro.attorneys.distinct().get(name__contains='Floyd') self.assertEqual(floyd.roles.all().count(), self.NUM_FLOYD_ROLES) self.assertEqual(floyd.name, u'Floyd G. Short') self.assertEqual(floyd.email, u'*****@*****.**') self.assertEqual(floyd.fax, u'(206) 516-3883') self.assertEqual(floyd.phone, u'(206) 373-7381') godfrey_llp = floyd.organizations.all()[0] self.assertEqual(godfrey_llp.name, u'Susman Godfrey, LLP') self.assertEqual(godfrey_llp.address1, u'1201 Third Ave.') self.assertEqual(godfrey_llp.address2, u'Suite 3800') self.assertEqual(godfrey_llp.city, u'Seattle') self.assertEqual(godfrey_llp.state, u'WA')
class PacerDocketParserTest(TestCase): """Can we parse RECAP dockets successfully?""" NUM_PARTIES = 3 NUM_PETRO_ATTYS = 6 NUM_FLOYD_ROLES = 3 DOCKET_PATH = os.path.join(settings.MEDIA_ROOT, 'test', 'xml', 'gov.uscourts.akd.41664.docket.xml') def setUp(self): self.pacer_doc = PacerXMLParser(self.DOCKET_PATH) self.docket = lookup_and_save(self.pacer_doc, debug=False) def tearDown(self): Docket.objects.all().delete() Party.objects.all().delete() Attorney.objects.all().delete() AttorneyOrganization.objects.all().delete() def test_party_parsing(self): """Can we parse an XML docket and get good results in the DB""" self.pacer_doc.make_parties(self.docket, debug=False) self.assertEqual(self.docket.parties.all().count(), self.NUM_PARTIES) petro = self.docket.parties.get(name__contains="Petro") self.assertEqual(petro.party_types.all()[0].name, "Plaintiff") attorneys = petro.attorneys.all().distinct() self.assertEqual(attorneys.count(), self.NUM_PETRO_ATTYS) floyd = petro.attorneys.distinct().get(name__contains='Floyd') self.assertEqual(floyd.roles.all().count(), self.NUM_FLOYD_ROLES) self.assertEqual(floyd.name, u'Floyd G. Short') self.assertEqual(floyd.email, u'*****@*****.**') self.assertEqual(floyd.fax, u'206-516-3883') self.assertEqual(floyd.phone, u'206-373-7381') godfrey_llp = floyd.organizations.all()[0] self.assertEqual(godfrey_llp.name, u'Susman Godfrey, LLP') self.assertEqual(godfrey_llp.address1, u'1201 Third Ave.') self.assertEqual(godfrey_llp.address2, u'Suite 3800') self.assertEqual(godfrey_llp.city, u'Seattle') self.assertEqual(godfrey_llp.state, u'WA')
def parse_items(self): """For every item in the directory, send it to Celery for processing""" docket_paths = get_docket_list() completed = 0 for docket_path in docket_paths: if completed < self.options['start_item'] - 1: # Skip ahead if start_lines is provided. completed += 1 continue else: logger.info("%s: Parsing docket: %s" % (completed, docket_path)) pacer_doc = PacerXMLParser(docket_path) required_fields = ['case_name', 'date_filed'] for field in required_fields: if not getattr(pacer_doc, field): logger.error("Missing required field: %s" % field) continue docket = lookup_and_save(pacer_doc, self.debug) if docket is not None: pacer_doc.make_documents(docket, self.debug) pacer_doc.make_parties(docket, self.debug) completed += 1 max_items = self.options['max_items'] if completed >= max_items != -1: logger.info("\n\nCompleted %s items. Aborting early." % max_items) break
def setUp(self): self.pacer_doc = PacerXMLParser(self.DOCKET_PATH) self.docket = lookup_and_save(self.pacer_doc, debug=False)