Exemple #1
0
def parse_recap_docket(self, filename, debug=False):
    """Parse a docket path, creating items or updating existing ones."""
    docket_path = os.path.join(settings.MEDIA_ROOT, 'recap', filename)
    recap_pks = []
    try:
        pacer_doc = PacerXMLParser(docket_path)
    except IOError:
        logger.warning("Unable to find the docket at: %s" % docket_path)
    else:
        required_fields = ['case_name', 'date_filed']
        for field in required_fields:
            if not getattr(pacer_doc, field):
                logger.error("Missing required field: %s" % field)
                return recap_pks
        docket = lookup_and_save(pacer_doc, debug=debug)
        if docket is not None:
            try:
                recap_pks = pacer_doc.make_documents(docket, debug=debug)
            except (IntegrityError,
                    DocketEntry.MultipleObjectsReturned) as exc:
                raise self.retry(exc=exc, countdown=20 * 60)
            else:
                pacer_doc.make_parties(docket, debug=debug)

    return recap_pks
    def parse_items(self):
        """For every item in the directory, send it to Celery for processing"""
        docket_paths = get_docket_list()

        completed = 0
        for docket_path in docket_paths:
            if completed < self.options['start_item'] - 1:
                # Skip ahead if start_lines is provided.
                completed += 1
                continue
            else:
                logger.info("%s: Parsing docket: %s" %
                            (completed, docket_path))

                pacer_doc = PacerXMLParser(docket_path)
                required_fields = ['case_name', 'date_filed']
                for field in required_fields:
                    if not getattr(pacer_doc, field):
                        logger.error("Missing required field: %s" % field)
                        continue

                docket = lookup_and_save(pacer_doc, self.debug)
                if docket is not None:
                    pacer_doc.make_documents(docket, self.debug)
                    pacer_doc.make_parties(docket, self.debug)

                completed += 1

                max_items = self.options['max_items']
                if completed >= max_items != -1:
                    logger.info("\n\nCompleted %s items. Aborting early." %
                                max_items)
                    break
    def parse_items(self):
        """For every item in the directory, send it to Celery for processing"""
        docket_paths = get_docket_list()

        completed = 0
        for docket_path in docket_paths:
            if completed < self.options['start_item'] - 1:
                # Skip ahead if start_lines is provided.
                completed += 1
                continue
            else:
                logger.info("%s: Parsing docket: %s" % (completed, docket_path))

                pacer_doc = PacerXMLParser(docket_path)
                required_fields = ['case_name', 'date_filed']
                for field in required_fields:
                    if not getattr(pacer_doc, field):
                        logger.error("Missing required field: %s" % field)
                        continue

                docket = lookup_and_save(pacer_doc, self.debug)
                if docket is not None:
                    pacer_doc.make_documents(docket, self.debug)
                    pacer_doc.make_parties(docket, self.debug)

                completed += 1

                max_items = self.options['max_items']
                if completed >= max_items != -1:
                    print("\n\nCompleted %s items. Aborting early." % max_items)
                    break
Exemple #4
0
def parse_recap_docket(self, filename, debug=False):
    """Parse a docket path, creating items or updating existing ones."""
    docket_path = os.path.join(settings.MEDIA_ROOT, 'recap', filename)
    recap_pks = []
    try:
        pacer_doc = PacerXMLParser(docket_path)
    except IOError:
        logger.warning("Unable to find the docket at: %s" % docket_path)
    else:
        required_fields = ['case_name', 'date_filed']
        for field in required_fields:
            if not getattr(pacer_doc, field):
                logger.error("Missing required field: %s" % field)
                return recap_pks
        docket = lookup_and_save(pacer_doc, debug=debug)
        if docket is not None:
            try:
                recap_pks = pacer_doc.make_documents(docket, debug=debug)
            except (IntegrityError, DocketEntry.MultipleObjectsReturned) as exc:
                raise self.retry(exc=exc, countdown=20 * 60)
            else:
                pacer_doc.make_parties(docket, debug=debug)

    return recap_pks
Exemple #5
0
def process_free_opinion_result(self, row_pk, cnt):
    """Process a single result from the free opinion report"""
    result = PACERFreeDocumentRow.objects.get(pk=row_pk)
    result.court = Court.objects.get(pk=map_pacer_to_cl_id(result.court_id))
    result.case_name = harmonize(result.case_name)
    result.case_name_short = cnt.make_case_name_short(result.case_name)
    row_copy = copy.copy(result)
    # If we don't do this, the doc's date_filed becomes the docket's
    # date_filed. Bad.
    delattr(row_copy, 'date_filed')
    # If we don't do this, we get the PACER court id and it crashes
    delattr(row_copy, 'court_id')
    # If we don't do this, the id of result tries to smash that of the docket.
    delattr(row_copy, 'id')
    try:
        with transaction.atomic():
            docket = lookup_and_save(row_copy)
            if not docket:
                msg = "Unable to create docket for %s" % result
                logger.error(msg)
                result.error_msg = msg
                result.save()
                self.request.callbacks = None
                return
            docket.blocked, docket.date_blocked = get_blocked_status(docket)
            docket.save()

            de, de_created = DocketEntry.objects.update_or_create(
                docket=docket,
                entry_number=result.document_number,
                defaults={
                    'date_filed': result.date_filed,
                    'description': result.description,
                })
            rd, rd_created = RECAPDocument.objects.update_or_create(
                docket_entry=de,
                document_number=result.document_number,
                attachment_number=None,
                defaults={
                    'pacer_doc_id': result.pacer_doc_id,
                    'document_type': RECAPDocument.PACER_DOCUMENT,
                    'is_free_on_pacer': True,
                })
    except IntegrityError as e:
        msg = "Raised IntegrityError: %s" % e
        logger.error(msg)
        if self.request.retries == self.max_retries:
            result.error_msg = msg
            result.save()
            return
        raise self.retry(exc=e)
    except DatabaseError as e:
        msg = "Unable to complete database transaction:\n%s" % e
        logger.error(msg)
        result.error_msg = msg
        result.save()
        self.request.callbacks = None
        return

    if not rd_created and rd.is_available:
        # The item already exists and is available. Fantastic, mark it as free,
        # and call it a day.
        rd.is_free_on_pacer = True
        rd.save()
        result.delete()
        self.request.callbacks = None
        return

    return {
        'result': result,
        'rd_pk': rd.pk,
        'pacer_court_id': result.court_id
    }
Exemple #6
0
 def setUp(self):
     self.pacer_doc = PacerXMLParser(self.DOCKET_PATH)
     self.docket = lookup_and_save(self.pacer_doc, debug=False)
Exemple #7
0
 def setUp(self):
     self.pacer_doc = PacerXMLParser(self.DOCKET_PATH)
     self.docket = lookup_and_save(self.pacer_doc, debug=False)
Exemple #8
0
def process_free_opinion_result(self, row_pk, cnt):
    """Process a single result from the free opinion report"""
    result = PACERFreeDocumentRow.objects.get(pk=row_pk)
    result.court = Court.objects.get(pk=map_pacer_to_cl_id(result.court_id))
    result.case_name = harmonize(result.case_name)
    result.case_name_short = cnt.make_case_name_short(result.case_name)
    row_copy = copy.copy(result)
    # If we don't do this, the doc's date_filed becomes the docket's
    # date_filed. Bad.
    delattr(row_copy, 'date_filed')
    # If we don't do this, we get the PACER court id and it crashes
    delattr(row_copy, 'court_id')
    # If we don't do this, the id of result tries to smash that of the docket.
    delattr(row_copy, 'id')
    try:
        with transaction.atomic():
            docket = lookup_and_save(row_copy)
            if not docket:
                msg = "Unable to create docket for %s" % result
                logger.error(msg)
                result.error_msg = msg
                result.save()
                self.request.callbacks = None
                return
            docket.blocked, docket.date_blocked = get_blocked_status(docket)
            docket.save()

            de, de_created = DocketEntry.objects.update_or_create(
                docket=docket,
                entry_number=result.document_number,
                defaults={
                    'date_filed': result.date_filed,
                    'description': result.description,
                }
            )
            rd, rd_created = RECAPDocument.objects.update_or_create(
                docket_entry=de,
                document_number=result.document_number,
                attachment_number=None,
                defaults={
                    'pacer_doc_id': result.pacer_doc_id,
                    'document_type': RECAPDocument.PACER_DOCUMENT,
                    'is_free_on_pacer': True,
                }
            )
    except IntegrityError as e:
        msg = "Raised IntegrityError: %s" % e
        logger.error(msg)
        if self.request.retries == self.max_retries:
            result.error_msg = msg
            result.save()
            return
        raise self.retry(exc=e)
    except DatabaseError as e:
        msg = "Unable to complete database transaction:\n%s" % e
        logger.error(msg)
        result.error_msg = msg
        result.save()
        self.request.callbacks = None
        return

    if not rd_created and rd.is_available:
        # The item already exists and is available. Fantastic, mark it as free,
        # and call it a day.
        rd.is_free_on_pacer = True
        rd.save()
        result.delete()
        self.request.callbacks = None
        return

    return {'result': result, 'rd_pk': rd.pk, 'pacer_court_id': result.court_id}