def parse_filings_on_cloud(afterdate, beforedate, get_old_active=True):
    logger.info(f"Parsing filings between {afterdate} and {beforedate}.")

    if get_old_active:
        all_case_nums = get_all_case_nums(
            afterdate, beforedate) + get_old_active_case_nums()
    else:
        all_case_nums = get_all_case_nums(afterdate, beforedate)

    logger.info(f"Found {len(all_case_nums)} case numbers.")
    parse_all_from_parse_filings(all_case_nums)
Example #2
0
def parse_filings_on_cloud(
    afterdate: datetime.date,
    beforedate: datetime.date,
    get_old_active=True,
    showbrowser=False,
    scraper: Optional[scrapers.FakeScraper] = None,
):
    """Parses filings without command line interface and outfile options."""

    logger.info(f"Parsing filings between {afterdate} and {beforedate}.")

    if not scraper:
        scraper = scrapers.TravisScraper(headless=not showbrowser)

    all_case_nums = scraper.get_all_case_nums(afterdate=afterdate,
                                              beforedate=beforedate)
    if get_old_active:
        from persist import get_old_active_case_nums

        all_case_nums += get_old_active_case_nums()

    # using dict to eliminate duplicates
    all_case_nums = list(dict.fromkeys(all_case_nums))
    logger.info(
        f"Found {len(all_case_nums)} case numbers (including old active ones)."
    )
    cases = parse_all_from_parse_filings(all_case_nums, scraper=scraper)

    # persist cases only if not using the test scraper
    if isinstance(scraper, scrapers.TravisScraper):
        persist_parsed_cases(cases)

    return cases
Example #3
0
def parse_filings_on_cloud(afterdate: str,
                           beforedate: str,
                           get_old_active=True):
    """Same as `parse_filings()` (see below) but without command line interface and showbrowser/outfile options"""

    logger.info(f"Parsing filings between {afterdate} and {beforedate}.")

    if get_old_active:
        all_case_nums = get_all_case_nums(
            afterdate, beforedate) + get_old_active_case_nums()
    else:
        all_case_nums = get_all_case_nums(afterdate, beforedate)

    logger.info(
        f"Found {len(all_case_nums)} case numbers (including old active ones)."
    )
    parse_all_from_parse_filings(all_case_nums)
Example #4
0
 def test_parse_cases_from_cli(self):
     ids_to_parse = ["J1-CV-20-001590"]
     cases = parse_hearings.parse_all_from_parse_filings(
         case_nums=ids_to_parse,
         showbrowser=False,
         scraper=FAKE_SCRAPER,
     )
     assert cases[0]["register_url"].endswith("CaseID=2286743")
     assert cases[0]["hearings"][0]["hearing_type"] == "Eviction Hearing"
def parse_filings(afterdate, beforedate, outfile, showbrowser=False):
    # use default firefox browser (rather than headless) is showbrowser is True
    if showbrowser:
        fetch_page.driver = webdriver.Chrome("./chromedriver")

    all_case_nums = get_all_case_nums(afterdate,
                                      beforedate) + get_old_active_case_nums()
    parsed_cases = parse_all_from_parse_filings(all_case_nums,
                                                showbrowser=showbrowser)

    try:
        json.dump(parsed_cases, outfile)
    except:
        logger.error("Creating the json file may have been unsuccessful.")
Example #6
0
def parse_filings(afterdate, beforedate, outfile, showbrowser=False):
    """
    Performs a full 'scraper run' between `afterdate` and `beforedate` - gets case details, events, and dispositions for all case numbers between
    `afterdate` and `beforedate`. Example of date format: 9-1-2020. Also updates rows in event/disposition/case_detail table that are still active
    """

    # use default chrome browser (rather than headless) is showbrowser is True
    if showbrowser:
        fetch_page.driver = webdriver.Chrome("./chromedriver")

    all_case_nums = get_all_case_nums(afterdate,
                                      beforedate) + get_old_active_case_nums()
    parsed_cases = parse_all_from_parse_filings(all_case_nums,
                                                showbrowser=showbrowser)

    try:
        json.dump(parsed_cases, outfile)
    except:
        logger.error("Creating the json file may have been unsuccessful.")