Ejemplo n.º 1
0
def get_and_save_free_document_report(self, court_id, start, end, cookies):
    """Download the Free document report and save it to the DB.

    :param self: The Celery task.
    :param court_id: A pacer court id.
    :param start: a date object representing the first day to get results.
    :param end: a date object representing the last day to get results.
    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
    logged-in PACER user.
    :return: None
    """
    s = PacerSession(cookies=cookies,
                     username=settings.PACER_USERNAME,
                     password=settings.PACER_PASSWORD)
    report = FreeOpinionReport(court_id, s)
    try:
        report.query(start, end, sort='case_number')
    except (ConnectionError, ChunkedEncodingError, ReadTimeoutError,
            ReadTimeout, ConnectTimeout) as exc:
        logger.warning("Unable to get free document report results from %s "
                       "(%s to %s). Trying again." % (court_id, start, end))
        if self.request.retries == self.max_retries:
            return PACERFreeDocumentLog.SCRAPE_FAILED
        raise self.retry(exc=exc, countdown=5)
    except SoftTimeLimitExceeded as exc:
        logger.warning("Soft time limit exceeded at %s. %s retries remain.",
                       court_id, (self.max_retries - self.request.retries))
        if self.request.retries == self.max_retries:
            return PACERFreeDocumentLog.SCRAPE_FAILED
        raise self.retry(exc=exc, countdown=5)

    try:
        results = report.data
    except (IndexError, HTTPError) as exc:
        # IndexError: When the page isn't downloaded properly.
        # HTTPError: raise_for_status in parse hit bad status.
        if self.request.retries == self.max_retries:
            return PACERFreeDocumentLog.SCRAPE_FAILED
        raise self.retry(exc=exc, countdown=5)

    for row in results:
        PACERFreeDocumentRow.objects.create(
            court_id=row.court_id,
            pacer_case_id=row.pacer_case_id,
            docket_number=row.docket_number,
            case_name=row.case_name,
            date_filed=row.date_filed,
            pacer_doc_id=row.pacer_doc_id,
            document_number=row.document_number,
            description=row.description,
            nature_of_suit=row.nature_of_suit,
            cause=row.cause,
        )

    return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL
Ejemplo n.º 2
0
def get_and_save_free_document_report(self, court_id, start, end, session):
    """Download the Free document report and save it to the DB.

    :param self: The Celery task.
    :param court_id: A pacer court id.
    :param start: a date object representing the first day to get results.
    :param end: a date object representing the last day to get results.
    :param session: A PACER Session object
    :return: None
    """
    report = FreeOpinionReport(court_id, session)
    try:
        report.query(start, end, sort='case_number')
    except (ConnectionError, ChunkedEncodingError, ReadTimeoutError,
            ReadTimeout, ConnectTimeout) as exc:
        logger.warning("Unable to get free document report results from %s "
                       "(%s to %s). Trying again." % (court_id, start, end))
        if self.request.retries == self.max_retries:
            return PACERFreeDocumentLog.SCRAPE_FAILED
        raise self.retry(exc=exc, countdown=10)

    try:
        results = report.data
    except (IndexError, HTTPError) as exc:
        # IndexError: When the page isn't downloaded properly.
        # HTTPError: raise_for_status in parse hit bad status.
        if self.request.retries == self.max_retries:
            return PACERFreeDocumentLog.SCRAPE_FAILED
        raise self.retry(exc=exc, countdown=10)

    for row in results:
        try:
            PACERFreeDocumentRow.objects.create(
                court_id=row.court_id,
                pacer_case_id=row.pacer_case_id,
                docket_number=row.docket_number,
                case_name=row.case_name,
                date_filed=row.date_filed,
                pacer_doc_id=row.pacer_doc_id,
                document_number=row.document_number,
                description=row.description,
                nature_of_suit=row.nature_of_suit,
                cause=row.cause,
             )
        except IntegrityError:
            # Duplicate for whatever reason.
            continue

    return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL
Ejemplo n.º 3
0
def get_free_document_report(self, court_id, start, end, session):
    """Get structured results from the PACER free document report"""
    report = FreeOpinionReport(court_id, session)
    try:
        report.query(start, end, sort='case_number')
    except (ConnectionError, ChunkedEncodingError, ReadTimeoutError,
            ConnectTimeout, HTTPError) as exc:
        logger.warning("Unable to get free document report results from %s "
                       "(%s to %s). Trying again." % (court_id, start, end))
        raise self.retry(exc=exc, countdown=5)

    try:
        return report.data
    except IndexError as exc:
        # Happens when the page isn't downloaded properly, ugh.
        raise self.retry(exc=exc, countdown=15)
Ejemplo n.º 4
0
def get_and_save_free_document_report(self, court_id, start, end, session):
    """Download the Free document report and save it to the DB.
    
    :param self: The Celery task.
    :param court_id: A pacer court id.
    :param start: a date object representing the first day to get results.
    :param end: a date object representing the last day to get results.
    :param session: A PACER Session object
    :return: None
    """
    report = FreeOpinionReport(court_id, session)
    try:
        responses = report.query(start, end, sort='case_number')
    except (ConnectionError, ChunkedEncodingError, ReadTimeoutError,
            ReadTimeout, ConnectTimeout) as exc:
        logger.warning("Unable to get free document report results from %s "
                       "(%s to %s). Trying again." % (court_id, start, end))
        if self.request.retries == self.max_retries:
            return PACERFreeDocumentLog.SCRAPE_FAILED
        raise self.retry(exc=exc, countdown=10)

    try:
        results = report.parse(responses)
    except (IndexError, HTTPError) as exc:
        # IndexError: When the page isn't downloaded properly.
        # HTTPError: raise_for_status in parse hit bad status.
        if self.request.retries == self.max_retries:
            return PACERFreeDocumentLog.SCRAPE_FAILED
        raise self.retry(exc=exc, countdown=10)

    for row in results:
        try:
            PACERFreeDocumentRow.objects.create(
                court_id=row.court_id,
                pacer_case_id=row.pacer_case_id,
                docket_number=row.docket_number,
                case_name=row.case_name,
                date_filed=row.date_filed,
                pacer_doc_id=row.pacer_doc_id,
                document_number=row.document_number,
                description=row.description,
                nature_of_suit=row.nature_of_suit,
                cause=row.cause,
             )
        except IntegrityError:
            # Duplicate for whatever reason.
            continue

    return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL
Ejemplo n.º 5
0
def get_free_document_report(self, court_id, start, end, session):
    """Get structured results from the PACER free document report"""
    report = FreeOpinionReport(court_id, session)
    try:
        responses = report.query(start, end, sort='case_number')
    except (ConnectionError, ChunkedEncodingError, ReadTimeoutError,
            ConnectTimeout) as exc:
        logger.warning("Unable to get free document report results from %s "
                       "(%s to %s). Trying again." % (court_id, start, end))
        raise self.retry(exc=exc, countdown=5)

    try:
        return report.parse(responses)
    except IndexError as exc:
        # Happens when the page isn't downloaded properly, ugh.
        raise self.retry(exc=exc, countdown=15)