def fetch_filings(afterdate: str, beforedate: str, case_num_prefix: str) -> List[str]: "Get filing case numbers between afterdate and beforedate and starting with case_num_prefix." logger.info( f"Scraping case numbers between {afterdate} and {beforedate} " f"for prefix {case_num_prefix}..." ) for tries in range(1, 11): try: filings_page_content = fetch_page.query_filings( afterdate, beforedate, case_num_prefix ) filings_soup = BeautifulSoup(filings_page_content, "html.parser") filings_case_nums_list, query_needs_splitting = get_filing_case_nums(filings_soup) break except: if tries == 10: logger.error(f"Failed to find case numbers on all 10 attempts.") # handle case of too many results (200 results means that the search cut off) if query_needs_splitting: try: end_of_first_range, start_of_second_range = split_date_range( afterdate, beforedate ) filings_case_nums_list = fetch_filings( afterdate, end_of_first_range, case_num_prefix ) + fetch_filings(start_of_second_range, beforedate, case_num_prefix) except ValueError: logger.error( f"The search returned {len(filings_case_nums_list)} results but there's nothing " "the code can do because beforedate and afterdate are the same.\n" "Case details will be scraped for these results.\n" ) # some logging to make sure results look good - could remove logger.info(f"Found {len(filings_case_nums_list)} case numbers.") if len(filings_case_nums_list) > 5: logger.info( f"Results preview: {filings_case_nums_list[0]}, {filings_case_nums_list[1]}, " f"..., {filings_case_nums_list[-1]}\n" ) else: logger.info(f"Results: {', '.join(filings_case_nums_list)}\n") return filings_case_nums_list
def test_fetch_filings_page(self): fetched = fetch_page.query_filings( afterdate="6-1-2020", beforedate="6-30-2020", case_num_prefix="J1-CV-20*" ) assert "J1-CV-20-001773" in fetched