Esempio n. 1
0
def fetch_filings(afterdate: str, beforedate: str, case_num_prefix: str) -> List[str]:
    "Get filing case numbers between afterdate and beforedate and starting with case_num_prefix."

    logger.info(
        f"Scraping case numbers between {afterdate} and {beforedate} "
        f"for prefix {case_num_prefix}..."
    )

    for tries in range(1, 11):
        try:
            filings_page_content = fetch_page.query_filings(
                afterdate, beforedate, case_num_prefix
            )
            filings_soup = BeautifulSoup(filings_page_content, "html.parser")
            filings_case_nums_list, query_needs_splitting = get_filing_case_nums(filings_soup)
            break
        except:
            if tries == 10:
                logger.error(f"Failed to find case numbers on all 10 attempts.")


    # handle case of too many results (200 results means that the search cut off)
    if query_needs_splitting:
        try:
            end_of_first_range, start_of_second_range = split_date_range(
                afterdate, beforedate
            )
            filings_case_nums_list = fetch_filings(
                afterdate, end_of_first_range, case_num_prefix
            ) + fetch_filings(start_of_second_range, beforedate, case_num_prefix)
        except ValueError:
            logger.error(
                f"The search returned {len(filings_case_nums_list)} results but there's nothing "
                "the code can do because beforedate and afterdate are the same.\n"
                "Case details will be scraped for these results.\n"
            )

    # some logging to make sure results look good - could remove
    logger.info(f"Found {len(filings_case_nums_list)} case numbers.")
    if len(filings_case_nums_list) > 5:
        logger.info(
            f"Results preview: {filings_case_nums_list[0]}, {filings_case_nums_list[1]}, "
            f"..., {filings_case_nums_list[-1]}\n"
        )
    else:
        logger.info(f"Results: {', '.join(filings_case_nums_list)}\n")

    return filings_case_nums_list
 def test_fetch_filings_page(self):
     
     fetched = fetch_page.query_filings(
         afterdate="6-1-2020", beforedate="6-30-2020", case_num_prefix="J1-CV-20*"
     )
     assert "J1-CV-20-001773" in fetched