Beispiel #1
0
def parse_recap_docket(self, filename, debug=False):
    """Parse a docket path, creating items or updating existing ones."""
    docket_path = os.path.join(settings.MEDIA_ROOT, 'recap', filename)
    recap_pks = []
    try:
        pacer_doc = PacerXMLParser(docket_path)
    except IOError:
        logger.warning("Unable to find the docket at: %s" % docket_path)
    else:
        required_fields = ['case_name', 'date_filed']
        for field in required_fields:
            if not getattr(pacer_doc, field):
                logger.error("Missing required field: %s" % field)
                return recap_pks
        docket = lookup_and_save(pacer_doc, debug=debug)
        if docket is not None:
            try:
                recap_pks = pacer_doc.make_documents(docket, debug=debug)
            except (IntegrityError,
                    DocketEntry.MultipleObjectsReturned) as exc:
                raise self.retry(exc=exc, countdown=20 * 60)
            else:
                pacer_doc.make_parties(docket, debug=debug)

    return recap_pks
    def parse_items(self):
        """For every item in the directory, send it to Celery for processing"""
        docket_paths = get_docket_list()

        completed = 0
        for docket_path in docket_paths:
            if completed < self.options['start_item'] - 1:
                # Skip ahead if start_lines is provided.
                completed += 1
                continue
            else:
                logger.info("%s: Parsing docket: %s" %
                            (completed, docket_path))

                pacer_doc = PacerXMLParser(docket_path)
                required_fields = ['case_name', 'date_filed']
                for field in required_fields:
                    if not getattr(pacer_doc, field):
                        logger.error("Missing required field: %s" % field)
                        continue

                docket = lookup_and_save(pacer_doc, self.debug)
                if docket is not None:
                    pacer_doc.make_documents(docket, self.debug)
                    pacer_doc.make_parties(docket, self.debug)

                completed += 1

                max_items = self.options['max_items']
                if completed >= max_items != -1:
                    logger.info("\n\nCompleted %s items. Aborting early." %
                                max_items)
                    break
Beispiel #3
0
    def parse_items(self):
        """For every item in the directory, send it to Celery for processing"""
        docket_paths = get_docket_list()

        completed = 0
        for docket_path in docket_paths:
            if completed < self.options['start_item'] - 1:
                # Skip ahead if start_lines is provided.
                completed += 1
                continue
            else:
                logger.info("%s: Parsing docket: %s" % (completed, docket_path))

                pacer_doc = PacerXMLParser(docket_path)
                required_fields = ['case_name', 'date_filed']
                for field in required_fields:
                    if not getattr(pacer_doc, field):
                        logger.error("Missing required field: %s" % field)
                        continue

                docket = lookup_and_save(pacer_doc, self.debug)
                if docket is not None:
                    pacer_doc.make_documents(docket, self.debug)
                    pacer_doc.make_parties(docket, self.debug)

                completed += 1

                max_items = self.options['max_items']
                if completed >= max_items != -1:
                    print("\n\nCompleted %s items. Aborting early." % max_items)
                    break
Beispiel #4
0
class PacerDocketParserTest(TestCase):
    """Can we parse RECAP dockets successfully?"""
    NUM_PARTIES = 3
    NUM_PETRO_ATTYS = 6
    NUM_FLOYD_ROLES = 3
    DOCKET_PATH = os.path.join(settings.MEDIA_ROOT, 'test', 'xml',
                               'gov.uscourts.akd.41664.docket.xml')

    def setUp(self):
        self.pacer_doc = PacerXMLParser(self.DOCKET_PATH)
        self.docket = lookup_and_save(self.pacer_doc, debug=False)

    def tearDown(self):
        Docket.objects.all().delete()
        Party.objects.all().delete()
        Attorney.objects.all().delete()
        AttorneyOrganization.objects.all().delete()

    def test_party_parsing(self):
        """Can we parse an XML docket and get good results in the DB"""
        self.pacer_doc.make_parties(self.docket, debug=False)

        self.assertEqual(self.docket.parties.all().count(), self.NUM_PARTIES)

        petro = self.docket.parties.get(name__contains="Petro")
        self.assertEqual(petro.party_types.all()[0].name, "Plaintiff")

        attorneys = petro.attorneys.all().distinct()
        self.assertEqual(attorneys.count(), self.NUM_PETRO_ATTYS)

        floyd = petro.attorneys.distinct().get(name__contains='Floyd')
        self.assertEqual(floyd.roles.all().count(), self.NUM_FLOYD_ROLES)
        self.assertEqual(floyd.name, u'Floyd G. Short')
        self.assertEqual(floyd.email, u'*****@*****.**')
        self.assertEqual(floyd.fax, u'(206) 516-3883')
        self.assertEqual(floyd.phone, u'(206) 373-7381')

        godfrey_llp = floyd.organizations.all()[0]
        self.assertEqual(godfrey_llp.name, u'Susman Godfrey, LLP')
        self.assertEqual(godfrey_llp.address1, u'1201 Third Ave.')
        self.assertEqual(godfrey_llp.address2, u'Suite 3800')
        self.assertEqual(godfrey_llp.city, u'Seattle')
        self.assertEqual(godfrey_llp.state, u'WA')
Beispiel #5
0
class PacerDocketParserTest(TestCase):
    """Can we parse RECAP dockets successfully?"""
    NUM_PARTIES = 3
    NUM_PETRO_ATTYS = 6
    NUM_FLOYD_ROLES = 3
    DOCKET_PATH = os.path.join(settings.MEDIA_ROOT, 'test', 'xml',
                               'gov.uscourts.akd.41664.docket.xml')

    def setUp(self):
        self.pacer_doc = PacerXMLParser(self.DOCKET_PATH)
        self.docket = lookup_and_save(self.pacer_doc, debug=False)

    def tearDown(self):
        Docket.objects.all().delete()
        Party.objects.all().delete()
        Attorney.objects.all().delete()
        AttorneyOrganization.objects.all().delete()

    def test_party_parsing(self):
        """Can we parse an XML docket and get good results in the DB"""
        self.pacer_doc.make_parties(self.docket, debug=False)

        self.assertEqual(self.docket.parties.all().count(), self.NUM_PARTIES)

        petro = self.docket.parties.get(name__contains="Petro")
        self.assertEqual(petro.party_types.all()[0].name, "Plaintiff")

        attorneys = petro.attorneys.all().distinct()
        self.assertEqual(attorneys.count(), self.NUM_PETRO_ATTYS)

        floyd = petro.attorneys.distinct().get(name__contains='Floyd')
        self.assertEqual(floyd.roles.all().count(), self.NUM_FLOYD_ROLES)
        self.assertEqual(floyd.name, u'Floyd G. Short')
        self.assertEqual(floyd.email, u'*****@*****.**')
        self.assertEqual(floyd.fax, u'206-516-3883')
        self.assertEqual(floyd.phone, u'206-373-7381')

        godfrey_llp = floyd.organizations.all()[0]
        self.assertEqual(godfrey_llp.name, u'Susman Godfrey, LLP')
        self.assertEqual(godfrey_llp.address1, u'1201 Third Ave.')
        self.assertEqual(godfrey_llp.address2, u'Suite 3800')
        self.assertEqual(godfrey_llp.city, u'Seattle')
        self.assertEqual(godfrey_llp.state, u'WA')
Beispiel #6
0
def parse_recap_docket(self, filename, debug=False):
    """Parse a docket path, creating items or updating existing ones."""
    docket_path = os.path.join(settings.MEDIA_ROOT, 'recap', filename)
    recap_pks = []
    try:
        pacer_doc = PacerXMLParser(docket_path)
    except IOError:
        logger.warning("Unable to find the docket at: %s" % docket_path)
    else:
        required_fields = ['case_name', 'date_filed']
        for field in required_fields:
            if not getattr(pacer_doc, field):
                logger.error("Missing required field: %s" % field)
                return recap_pks
        docket = lookup_and_save(pacer_doc, debug=debug)
        if docket is not None:
            try:
                recap_pks = pacer_doc.make_documents(docket, debug=debug)
            except (IntegrityError, DocketEntry.MultipleObjectsReturned) as exc:
                raise self.retry(exc=exc, countdown=20 * 60)
            else:
                pacer_doc.make_parties(docket, debug=debug)

    return recap_pks