def parse_filings_on_cloud( afterdate: datetime.date, beforedate: datetime.date, get_old_active=True, showbrowser=False, scraper: Optional[scrapers.FakeScraper] = None, ): """Parses filings without command line interface and outfile options.""" logger.info(f"Parsing filings between {afterdate} and {beforedate}.") if not scraper: scraper = scrapers.TravisScraper(headless=not showbrowser) all_case_nums = scraper.get_all_case_nums(afterdate=afterdate, beforedate=beforedate) if get_old_active: from persist import get_old_active_case_nums all_case_nums += get_old_active_case_nums() # using dict to eliminate duplicates all_case_nums = list(dict.fromkeys(all_case_nums)) logger.info( f"Found {len(all_case_nums)} case numbers (including old active ones)." ) cases = parse_all_from_parse_filings(all_case_nums, scraper=scraper) # persist cases only if not using the test scraper if isinstance(scraper, scrapers.TravisScraper): persist_parsed_cases(cases) return cases
def parse_filings_on_cloud(afterdate, beforedate, get_old_active=True): logger.info(f"Parsing filings between {afterdate} and {beforedate}.") if get_old_active: all_case_nums = get_all_case_nums( afterdate, beforedate) + get_old_active_case_nums() else: all_case_nums = get_all_case_nums(afterdate, beforedate) logger.info(f"Found {len(all_case_nums)} case numbers.") parse_all_from_parse_filings(all_case_nums)
def parse_filings(afterdate, beforedate, outfile, showbrowser=False): # use default firefox browser (rather than headless) is showbrowser is True if showbrowser: fetch_page.driver = webdriver.Chrome("./chromedriver") all_case_nums = get_all_case_nums(afterdate, beforedate) + get_old_active_case_nums() parsed_cases = parse_all_from_parse_filings(all_case_nums, showbrowser=showbrowser) try: json.dump(parsed_cases, outfile) except: logger.error("Creating the json file may have been unsuccessful.")
def parse_filings_on_cloud(afterdate: str, beforedate: str, get_old_active=True): """Same as `parse_filings()` (see below) but without command line interface and showbrowser/outfile options""" logger.info(f"Parsing filings between {afterdate} and {beforedate}.") if get_old_active: all_case_nums = get_all_case_nums( afterdate, beforedate) + get_old_active_case_nums() else: all_case_nums = get_all_case_nums(afterdate, beforedate) logger.info( f"Found {len(all_case_nums)} case numbers (including old active ones)." ) parse_all_from_parse_filings(all_case_nums)
def parse_filings(afterdate, beforedate, outfile, showbrowser=False): """ Performs a full 'scraper run' between `afterdate` and `beforedate` - gets case details, events, and dispositions for all case numbers between `afterdate` and `beforedate`. Example of date format: 9-1-2020. Also updates rows in event/disposition/case_detail table that are still active """ # use default chrome browser (rather than headless) is showbrowser is True if showbrowser: fetch_page.driver = webdriver.Chrome("./chromedriver") all_case_nums = get_all_case_nums(afterdate, beforedate) + get_old_active_case_nums() parsed_cases = parse_all_from_parse_filings(all_case_nums, showbrowser=showbrowser) try: json.dump(parsed_cases, outfile) except: logger.error("Creating the json file may have been unsuccessful.")