def parse_items(self):
        """For every item in the directory, send it to Celery for processing"""
        docket_paths = get_docket_list()

        completed = 0
        for docket_path in docket_paths:
            if completed < self.options['start_item'] - 1:
                # Skip ahead if start_lines is provided.
                completed += 1
                continue
            else:
                logger.info("%s: Parsing docket: %s" %
                            (completed, docket_path))

                pacer_doc = PacerXMLParser(docket_path)
                required_fields = ['case_name', 'date_filed']
                for field in required_fields:
                    if not getattr(pacer_doc, field):
                        logger.error("Missing required field: %s" % field)
                        continue

                docket = lookup_and_save(pacer_doc, self.debug)
                if docket is not None:
                    pacer_doc.make_documents(docket, self.debug)
                    pacer_doc.make_parties(docket, self.debug)

                completed += 1

                max_items = self.options['max_items']
                if completed >= max_items != -1:
                    logger.info("\n\nCompleted %s items. Aborting early." %
                                max_items)
                    break
def down_for_only_me(session, url):
    """Check if a URL is down just our server, or globally

    :return: True if the url is only down for me, or False if entirely up or
    entirely down.
    """
    try:
        check_and_log_url(session, url)
    except requests.RequestException:
        # Down from our server. Try from our proxy.
        try:
            proxy_response = check_if_global_outage(session, url)
        except requests.RequestException as e:
            logger.error("Problem hitting proxy: %s", e)
            raise e

        j = proxy_response.json()
        if j["status_code"] is not None:
            # Down from our server, but up from our proxy. Yikes!
            return True
        else:
            # Down from our server, and down from our proxy. OK.
            return False

    # Up from our server. OK.
    return False
    def sample_dockets(self):
        """Iterate over `node_count` items and extract the value at the XPath.

        If there are not `node_count` recap dockets on disk, do the lesser of
        the two.
        """
        docket_paths = get_docket_list(self.options['path'])
        random.shuffle(docket_paths)

        completed = 0
        no_value = 0
        errors = 0
        c = Counter()
        for docket_path in docket_paths:
            with open(docket_path, 'r') as f:
                docket_xml_content = f.read()

                if not docket_xml_content:
                    continue

            # Extract the xpath value
            try:
                tree = etree.fromstring(docket_xml_content)
            except XMLSyntaxError:
                errors += 1
                continue
            try:
                values = tree.xpath(self.options['xpath'])
            except XPathEvalError:
                logger.error("Invalid XPath expression.")
                exit(1)

            if values:
                logger.info("%s: %s" % (completed, values))
                c.update([str(v) for v in values])
                completed += 1
            else:
                no_value += 1

            if completed == self.options['sample_size']:
                break

        with open('sample.pkl', 'wb') as f:
            pickle.dump(c, f)
        logger.info('\n%s items had no value. %s errors. Sample saved at '
                    '"sample.pkl"' % (no_value, errors))
def iterate_and_log_courts(courts):
    session = requests.Session()
    for court in courts:
        url = make_simple_url(court)
        logger.info("Checking url for %s: %s", court.pk, url)
        t1 = now()
        max_tries = 3
        try_number = 1
        while try_number <= max_tries:
            down_for_me = down_for_only_me(session, url)
            if not down_for_me:
                break
            try_number += 1
        else:
            # Tried `try_count` times, and it was always down just for me. Oof.
            # Use % instead of logging params to bypass Sentry issue grouping
            logger.error(
                "After %s seconds and %s tries, failed to access %s's PACER "
                "website from our server, but got it via our proxy each time."
                % ((now() - t1).seconds, try_number, court.pk))
Exemple #5
0
def add_directory(options):
    """Import JSON files from a directory provided at the command line.

    Use glob.globs' to identify JSON files to import.

    :return: None
    """
    dir_glob = options["directory_glob"]
    skip_until = options["skip_until"]
    if dir_glob is None:
        print("--directory-glob is a required parameter when the "
              "'add-directory' action is selected.")
    else:
        dir_glob = options["directory_glob"]
        fps = sorted(glob(dir_glob))
        if skip_until:
            # Remove items from the list until the skip_until value is hit.
            try:
                skip_index = fps.index(skip_until)
                fps = fps[skip_index:]
            except ValueError:
                logger.error(
                    "Unable to find '%s' in directory_glob: '%s'. "
                    "The first few items of the glob look like: \n  "
                    "%s",
                    skip_until,
                    dir_glob,
                    "\n  ".join(fps[0:3]),
                )
                raise

        q = options["queue"]
        throttle = CeleryThrottle(queue_name=q)
        for fp in fps:
            throttle.maybe_wait()
            logger.info("Adding LASC JSON file at: %s", fp)
            tasks.add_case_from_filepath.apply_async(kwargs={"filepath": fp},
                                                     queue=q)
Exemple #6
0
def get_and_save_free_document_reports(options):
    """Query the Free Doc Reports on PACER and get a list of all the free
    documents. Do not download those items, as that step is done later.
    """
    # Kill any *old* logs that report they're in progress. (They've failed.)
    twelve_hrs_ago = now() - timedelta(hours=12)
    PACERFreeDocumentLog.objects.filter(
        date_started__lt=twelve_hrs_ago,
        status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
    ).update(status=PACERFreeDocumentLog.SCRAPE_FAILED, )

    cl_court_ids = Court.objects.filter(
        jurisdiction__in=[Court.FEDERAL_DISTRICT, Court.FEDERAL_BANKRUPTCY],
        in_use=True,
        end_date=None,
    ).exclude(pk__in=[
        'casb', 'ganb', 'gub', 'innb', 'mieb', 'miwb', 'nmib', 'nvb', 'ohsb',
        'prb', 'tnwb', 'vib'
    ], ).values_list(
        'pk',
        flat=True,
    )
    pacer_court_ids = {
        map_cl_to_pacer_id(v): {
            'until': now(),
            'count': 1,
            'result': None
        }
        for v in cl_court_ids
    }
    pacer_session = PacerSession(username=PACER_USERNAME,
                                 password=PACER_PASSWORD)
    pacer_session.login()

    # Iterate over every court, X days at a time. As courts are completed,
    # remove them from the list of courts to process until none are left
    tomorrow = now() + timedelta(days=1)
    while len(pacer_court_ids) > 0:
        court_ids_copy = pacer_court_ids.copy()  # Make a copy of the list.
        for pacer_court_id, delay in court_ids_copy.items():
            if now() < delay['until']:
                # Do other courts until the delay is up. Do not print/log
                # anything since at the end there will only be one court left.
                continue

            next_start_date, next_end_date = get_next_date_range(
                pacer_court_id)
            if delay['result'] is not None:
                if delay['result'].ready():
                    result = delay['result'].get()
                    if result == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
                        if next_start_date >= tomorrow.date():
                            logger.info("Finished '%s'. Marking it complete." %
                                        pacer_court_id)
                            pacer_court_ids.pop(pacer_court_id, None)
                            continue

                    elif result == PACERFreeDocumentLog.SCRAPE_FAILED:
                        logger.error("Encountered critical error on %s "
                                     "(network error?). Marking as failed and "
                                     "pressing on." % pacer_court_id)
                        pacer_court_ids.pop(pacer_court_id, None)
                        continue
                else:
                    next_delay = min(delay['count'] * 5, 30)  # backoff w/cap
                    logger.info(
                        "Court %s still in progress. Delaying at least "
                        "%ss." % (pacer_court_id, next_delay))
                    pacer_court_ids[pacer_court_id]['until'] = now(
                    ) + timedelta(seconds=next_delay)
                    pacer_court_ids[pacer_court_id]['count'] += 1
                    continue

            mark_court_in_progress(pacer_court_id, next_end_date)
            pacer_court_ids[pacer_court_id]['count'] = 1  # Reset
            delay['result'] = chain(
                get_and_save_free_document_report.si(pacer_court_id,
                                                     next_start_date,
                                                     next_end_date,
                                                     pacer_session),
                mark_court_done_on_date.s(pacer_court_id, next_end_date),
            ).apply_async()
def parse_harvard_opinions(reporter, volume, make_searchable):
    """
    Parse downloaded CaseLaw Corpus from internet archive and add them to our
    database.

    Optionally uses a reporter abbreviation to identify cases to download as
    used by IA.  (Ex. T.C. => tc)

    Optionally uses a volume integer.

    If neither is provided, code will cycle through all downloaded files.

    :param volume: The volume (int) of the reporters (optional) (ex 10)
    :param reporter: Reporter string as slugify'd (optional) (tc) for T.C.
    :param make_searchable: Boolean to indicate saving to solr
    :return: None
    """
    if not reporter and volume:
        logger.error("You provided a volume but no reporter. Exiting.")
        return

    for file_path in filepath_list(reporter, volume):
        ia_download_url = "/".join(
            ["https://archive.org/download", file_path.split("/", 9)[-1]]
        )

        if OpinionCluster.objects.filter(
            filepath_json_harvard=file_path
        ).exists():
            logger.info("Skipping - already in system %s" % ia_download_url)
            continue

        try:
            with open(file_path) as f:
                data = json.load(f)
        except ValueError:
            logger.warning("Empty json: missing case at: %s" % ia_download_url)
            continue
        except Exception as e:
            logger.warning("Unknown error %s for: %s" % (e, ia_download_url))
            continue

        cites = get_citations(data["citations"][0]["cite"])
        if not cites:
            logger.info(
                "No citation found for %s." % data["citations"][0]["cite"]
            )
            continue

        case_name = harmonize(data["name_abbreviation"])
        case_name_short = cnt.make_case_name_short(case_name)
        case_name_full = harmonize(data["name"])

        citation = cites[0]
        if skip_processing(citation, case_name, file_path):
            continue

        # TODO: Generalize this to handle all court types somehow.
        court_id = match_court_string(
            data["court"]["name"],
            state=True,
            federal_appeals=True,
            federal_district=True,
        )

        soup = BeautifulSoup(data["casebody"]["data"], "lxml")

        # Some documents contain images in the HTML
        # Flag them for a later crawl by using the placeholder '[[Image]]'
        judge_list = [
            extract_judge_last_name(x.text) for x in soup.find_all("judges")
        ]
        author_list = [
            extract_judge_last_name(x.text) for x in soup.find_all("author")
        ]
        # Flatten and dedupe list of judges
        judges = ", ".join(
            sorted(
                list(
                    set(
                        itertools.chain.from_iterable(judge_list + author_list)
                    )
                )
            )
        )
        judges = titlecase(judges)
        docket_string = (
            data["docket_number"]
            .replace("Docket No.", "")
            .replace("Docket Nos.", "")
            .strip()
        )

        short_fields = ["attorneys", "disposition", "otherdate", "seealso"]

        long_fields = [
            "syllabus",
            "summary",
            "history",
            "headnotes",
            "correction",
        ]

        short_data = parse_extra_fields(soup, short_fields, False)
        long_data = parse_extra_fields(soup, long_fields, True)

        with transaction.atomic():
            logger.info("Adding docket for: %s", citation.base_citation())
            docket = Docket(
                case_name=case_name,
                case_name_short=case_name_short,
                case_name_full=case_name_full,
                docket_number=docket_string,
                court_id=court_id,
                source=Docket.HARVARD,
                ia_needs_upload=False,
            )
            try:
                with transaction.atomic():
                    docket.save()
            except OperationalError as e:
                if "exceeds maximum" in str(e):
                    docket.docket_number = (
                        "%s, See Corrections for full Docket Number"
                        % trunc(docket_string, length=5000, ellipsis="...")
                    )
                    docket.save()
                    long_data["correction"] = "%s <br> %s" % (
                        data["docket_number"],
                        long_data["correction"],
                    )
            # Handle partial dates by adding -01v to YYYY-MM dates
            date_filed, is_approximate = validate_dt(data["decision_date"])

            logger.info("Adding cluster for: %s", citation.base_citation())
            cluster = OpinionCluster(
                case_name=case_name,
                case_name_short=case_name_short,
                case_name_full=case_name_full,
                precedential_status="Published",
                docket_id=docket.id,
                source="U",
                date_filed=date_filed,
                date_filed_is_approximate=is_approximate,
                attorneys=short_data["attorneys"],
                disposition=short_data["disposition"],
                syllabus=long_data["syllabus"],
                summary=long_data["summary"],
                history=long_data["history"],
                other_dates=short_data["otherdate"],
                cross_reference=short_data["seealso"],
                headnotes=long_data["headnotes"],
                correction=long_data["correction"],
                judges=judges,
                filepath_json_harvard=file_path,
            )
            cluster.save(index=False)

            logger.info("Adding citation for: %s", citation.base_citation())
            Citation.objects.create(
                volume=citation.volume,
                reporter=citation.reporter,
                page=citation.page,
                type=map_reporter_db_cite_type(
                    REPORTERS[citation.canonical_reporter][0]["cite_type"]
                ),
                cluster_id=cluster.id,
            )
            new_op_pks = []
            for op in soup.find_all("opinion"):
                # This code cleans author tags for processing.
                # It is particularly useful for identifiying Per Curiam
                for elem in [op.find("author")]:
                    if elem is not None:
                        [x.extract() for x in elem.find_all("page-number")]

                auth = op.find("author")
                if auth is not None:
                    author_tag_str = titlecase(auth.text.strip(":"))
                    author_str = titlecase(
                        "".join(extract_judge_last_name(author_tag_str))
                    )
                else:
                    author_str = ""
                    author_tag_str = ""

                per_curiam = True if author_tag_str == "Per Curiam" else False
                # If Per Curiam is True set author string to Per Curiam
                if per_curiam:
                    author_str = "Per Curiam"

                op_type = map_opinion_type(op.get("type"))
                opinion_xml = str(op)
                logger.info("Adding opinion for: %s", citation.base_citation())
                op = Opinion(
                    cluster_id=cluster.id,
                    type=op_type,
                    author_str=author_str,
                    xml_harvard=opinion_xml,
                    per_curiam=per_curiam,
                    extracted_by_ocr=True,
                )
                # Don't index now; do so later if desired
                op.save(index=False)
                new_op_pks.append(op.pk)

        if make_searchable:
            add_items_to_solr.delay(new_op_pks, "search.Opinion")

        logger.info("Finished: %s", citation.base_citation())
Exemple #8
0
def get_and_save_free_document_reports(options):
    """Query the Free Doc Reports on PACER and get a list of all the free
    documents. Do not download those items, as that step is done later. For now
    just get the list.

    Note that this uses synchronous celery chains. A previous version was more
    complex and did not use synchronous chains. Unfortunately in Celery 4.2.0,
    or more accurately in redis-py 3.x.x, doing it that way failed nearly every
    time.

    This is a simpler version, though a slower one, but it should get the job
    done.
    """
    # Kill any *old* logs that report they're in progress. (They've failed.)
    three_hrs_ago = now() - timedelta(hours=3)
    PACERFreeDocumentLog.objects.filter(
        date_started__lt=three_hrs_ago,
        status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
    ).update(status=PACERFreeDocumentLog.SCRAPE_FAILED, )

    cl_court_ids = (Court.federal_courts.district_pacer_courts().filter(
        in_use=True,
        end_date=None,
    ).exclude(pk__in=["casb", "gub", "innb", "miwb", "ohsb",
                      "prb"], ).values_list(
                          "pk",
                          flat=True,
                      ))
    pacer_court_ids = [map_cl_to_pacer_id(v) for v in cl_court_ids]
    today = now()
    for pacer_court_id in pacer_court_ids:
        while True:
            next_start_d, next_end_d = get_next_date_range(pacer_court_id)
            logger.info(
                "Attempting to get latest document references for "
                "%s between %s and %s",
                pacer_court_id,
                next_start_d,
                next_end_d,
            )
            mark_court_in_progress(pacer_court_id, next_end_d)
            try:
                status = get_and_save_free_document_report(
                    pacer_court_id,
                    next_start_d,
                    next_end_d,
                )
            except RequestException:
                logger.error(
                    "Failed to get document references for %s "
                    "between %s and %s due to network error.",
                    pacer_court_id,
                    next_start_d,
                    next_end_d,
                )
                mark_court_done_on_date(
                    PACERFreeDocumentLog.SCRAPE_FAILED,
                    pacer_court_id,
                    next_end_d,
                )
                break
            except IndexError:
                logger.error(
                    "Failed to get document references for %s "
                    "between %s and %s due to PACER 6.3 bug.",
                    pacer_court_id,
                    next_start_d,
                    next_end_d,
                )
                mark_court_done_on_date(
                    PACERFreeDocumentLog.SCRAPE_FAILED,
                    pacer_court_id,
                    next_end_d,
                )
                break
            else:
                result = mark_court_done_on_date(status, pacer_court_id,
                                                 next_end_d)

            if result == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
                if next_end_d >= today.date():
                    logger.info("Got all document references for '%s'.",
                                pacer_court_id)
                    # Break from while loop, onwards to next court
                    break
                else:
                    # More dates to do; let it continue
                    continue

            elif result == PACERFreeDocumentLog.SCRAPE_FAILED:
                logger.error("Encountered critical error on %s "
                             "(network error?). Marking as failed and "
                             "pressing on." % pacer_court_id)
                # Break from while loop, onwards to next court
                break
def parse_harvard_opinions(reporter, volume):
    """
    Parse downloaded CaseLaw Corpus from internet archive and add them to our
    database.

    Optionally uses a reporter abbreviation to identify cases to download as
    used by IA.  (Ex. T.C. => tc)

    Optionally uses a volume integer.

    If neither is provided, code will cycle through all downloaded files.

    :param volume: The volume (int) of the reporters (optional) (ex 10)
    :param reporter: Reporter string as slugify'd (optional) (tc) for T.C.
    :return: None
    """
    if not reporter and volume:
        logger.error("You provided a volume but no reporter. Exiting.")
        return

    for file_path in filepath_list(reporter, volume):
        ia_download_url = "/".join(
            ["https://archive.org/download", file_path.split("/", 9)[-1]]
        )

        if OpinionCluster.objects.filter(
            filepath_json_harvard=file_path
        ).exists():
            logger.info("Skipping - already in system %s" % ia_download_url)
            continue

        try:
            with open(file_path) as f:
                data = json.load(f)
        except ValueError:
            logger.warning("Empty json: missing case at: %s" % ia_download_url)
            continue
        except Exception as e:
            logger.warning("Unknown error %s for: %s" % (e, ia_download_url))
            continue

        cites = get_citations(data["citations"][0]["cite"], html=False)
        if not cites:
            logger.info(
                "No citation found for %s." % data["citations"][0]["cite"]
            )
            continue

        case_name = harmonize(data["name_abbreviation"])
        case_name_short = cnt.make_case_name_short(case_name)
        case_name_full = harmonize(data["name"])

        citation = cites[0]
        if skip_processing(citation, case_name):
            continue

        # TODO: Generalize this to handle all court types somehow.
        court_id = match_court_string(
            data["court"]["name"],
            state=True,
            federal_appeals=True,
            federal_district=True,
        )

        soup = BeautifulSoup(data["casebody"]["data"], "lxml")

        # Some documents contain images in the HTML
        # Flag them for a later crawl by using the placeholder '[[Image]]'
        judge_list = [
            find_judge_names(x.text) for x in soup.find_all("judges")
        ]
        author_list = [
            find_judge_names(x.text) for x in soup.find_all("author")
        ]
        # Flatten and dedupe list of judges
        judges = ", ".join(
            list(set(itertools.chain.from_iterable(judge_list + author_list)))
        )
        judges = titlecase(judges)
        docket_string = (
            data["docket_number"]
            .replace("Docket No.", "")
            .replace("Docket Nos.", "")
            .strip()
        )

        with transaction.atomic():
            logger.info("Adding docket for: %s", citation.base_citation())
            docket = Docket.objects.create(
                case_name=case_name,
                case_name_short=case_name_short,
                case_name_full=case_name_full,
                docket_number=docket_string,
                court_id=court_id,
                source=Docket.HARVARD,
                ia_needs_upload=False,
            )
            # Iterate over other xml fields in Harvard data set
            # and save as string list   for further processing at a later date.
            json_fields = [
                "attorneys",
                "disposition",
                "syllabus",
                "summary",
                "history",
                "otherdate",
                "seealso",
                "headnotes",
                "correction",
            ]
            data_set = {}
            while json_fields:
                key = json_fields.pop(0)
                data_set[key] = "|".join([x.text for x in soup.find_all(key)])

            # Handle partial dates by adding -01v to YYYY-MM dates
            date_filed, is_approximate = validate_dt(data["decision_date"])

            logger.info("Adding cluster for: %s", citation.base_citation())
            cluster = OpinionCluster.objects.create(
                case_name=case_name,
                case_name_short=case_name_short,
                case_name_full=case_name_full,
                precedential_status="Published",
                docket_id=docket.id,
                source="U",
                date_filed=date_filed,
                date_filed_is_approximate=is_approximate,
                attorneys=data_set["attorneys"],
                disposition=data_set["disposition"],
                syllabus=data_set["syllabus"],
                summary=data_set["summary"],
                history=data_set["history"],
                other_dates=data_set["otherdate"],
                cross_reference=data_set["seealso"],
                headnotes=data_set["headnotes"],
                correction=data_set["correction"],
                judges=judges,
                filepath_json_harvard=file_path,
            )

            logger.info("Adding citation for: %s", citation.base_citation())
            Citation.objects.create(
                volume=citation.volume,
                reporter=citation.reporter,
                page=citation.page,
                type=map_reporter_db_cite_type(
                    REPORTERS[citation.reporter][0]["cite_type"]
                ),
                cluster_id=cluster.id,
            )
            for op in soup.find_all("opinion"):
                joined_by_str = titlecase(
                    " ".join(
                        list(set(itertools.chain.from_iterable(judge_list)))
                    )
                )
                author_str = titlecase(
                    " ".join(
                        list(set(itertools.chain.from_iterable(author_list)))
                    )
                )

                op_type = map_opinion_type(op.get("type"))
                opinion_xml = str(op)
                logger.info("Adding opinion for: %s", citation.base_citation())
                Opinion.objects.create(
                    cluster_id=cluster.id,
                    type=op_type,
                    author_str=author_str,
                    xml_harvard=opinion_xml,
                    joined_by_str=joined_by_str,
                    extracted_by_ocr=True,
                )

        logger.info("Finished: %s", citation.base_citation())
def get_and_save_free_document_reports(options):
    """Query the Free Doc Reports on PACER and get a list of all the free
    documents. Do not download those items, as that step is done later.
    """
    # Kill any *old* logs that report they're in progress. (They've failed.)
    three_hrs_ago = now() - timedelta(hours=3)
    PACERFreeDocumentLog.objects.filter(
        date_started__lt=three_hrs_ago,
        status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
    ).update(
        status=PACERFreeDocumentLog.SCRAPE_FAILED,
    )

    cl_court_ids = Court.objects.filter(
        jurisdiction__in=[Court.FEDERAL_DISTRICT,
                          Court.FEDERAL_BANKRUPTCY],
        in_use=True,
        end_date=None,
    ).exclude(
        pk__in=['casb', 'gub', 'innb', 'miwb', 'ohsb', 'prb'],
    ).values_list(
        'pk',
        flat=True,
    )
    pacer_court_ids = {
        map_cl_to_pacer_id(v): {'until': now(), 'count': 1, 'result': None} for
        v in cl_court_ids
    }
    pacer_session = PacerSession(username=PACER_USERNAME,
                                 password=PACER_PASSWORD)
    pacer_session.login()

    # Iterate over every court, X days at a time. As courts are completed,
    # remove them from the list of courts to process until none are left
    today = now()
    max_delay_count = 20
    while len(pacer_court_ids) > 0:
        court_ids_copy = pacer_court_ids.copy()  # Make a copy of the list.
        for pacer_court_id, delay in court_ids_copy.items():
            if now() < delay['until']:
                # Do other courts until the delay is up. Do not print/log
                # anything since at the end there will only be one court left.
                continue

            next_start_d, next_end_d = get_next_date_range(pacer_court_id)
            if delay['result'] is not None:
                if delay['result'].ready():
                    result = delay['result'].get()
                    if result == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
                        if next_end_d >= today.date():
                            logger.info("Finished '%s'. Marking it complete." %
                                        pacer_court_id)
                            pacer_court_ids.pop(pacer_court_id, None)
                            continue

                    elif result == PACERFreeDocumentLog.SCRAPE_FAILED:
                        logger.error("Encountered critical error on %s "
                                     "(network error?). Marking as failed and "
                                     "pressing on." % pacer_court_id)
                        pacer_court_ids.pop(pacer_court_id, None)
                        continue
                else:
                    if delay['count'] > max_delay_count:
                        logger.error("Something went wrong and we weren't "
                                     "able to finish %s. We ran out of time." %
                                     pacer_court_id)
                        pacer_court_ids.pop(pacer_court_id, None)
                        continue
                    next_delay = min(delay['count'] * 5, 30)  # backoff w/cap
                    logger.info("Court %s still in progress. Delaying at "
                                "least %ss." % (pacer_court_id, next_delay))
                    delay_until = now() + timedelta(seconds=next_delay)
                    pacer_court_ids[pacer_court_id]['until'] = delay_until
                    pacer_court_ids[pacer_court_id]['count'] += 1
                    continue

            mark_court_in_progress(pacer_court_id, next_end_d)
            pacer_court_ids[pacer_court_id]['count'] = 1  # Reset
            delay['result'] = chain(
                get_and_save_free_document_report.si(
                    pacer_court_id,
                    next_start_d,
                    next_end_d,
                    pacer_session.cookies,
                ),
                mark_court_done_on_date.s(pacer_court_id, next_end_d),
            ).apply_async()