def parse_settings_on_cloud(
    afterdate: str,
    beforedate: str,
    write_to_sheets=True,
    scraper: Optional[scrapers.FakeScraper] = None,
):
    """
    Same as `parse_settings()` (see below) but without command line interface and showbrowser option.
    Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True
    """
    if scraper is None:
        scraper = scrapers.TravisScraper()
    logger.info(f"Parsing settings between {afterdate} and {beforedate}.")

    days_to_pull = get_days_between_dates(afterdate=afterdate,
                                          beforedate=beforedate)
    pulled_settings = scraper.make_setting_list(days_to_pull)
    if scraper is None:
        scraper = scrapers.TravisScraper(headless=True)
    if isinstance(scraper, scrapers.TravisScraper):
        import persist

        for setting in pulled_settings:
            persist.rest_setting(setting)
    # maybe make this cleaner in sql? future work
    if write_to_sheets:
        import gsheet

        gsheet.write_pulled_settings(pulled_settings)
def parse_all_from_parse_filings(
    case_nums: List[str],
    scraper: Optional[scrapers.FakeScraper] = None,
    db: bool = True,
    county: str = "travis",
    showbrowser: bool = False,
) -> List[Dict[str, Any]]:
    """
    Gets case details for each case number in `case_nums` and sends the data to PostgreSQL.
    Logs any case numbers for which getting data failed.
    """
    if not scraper:
        # Get the scraper corresponding to the lowercase command line entry for county. Default to TravisScraper.
        county = county.lower()
        scraper = (
            scrapers.SCRAPER_NAMES[county]()
            if county in scrapers.SCRAPER_NAMES
            else scrapers.TravisScraper()
        )
    parsed_cases = []
    for tries in range(1, 6):
        try:
            parsed_cases = scraper.make_case_list(ids_to_parse=case_nums)
            return parsed_cases
        except Exception as e:
            logger.error(
                f"Failed to parse hearings on attempt {tries}. Error message: {e}"
            )
    return parsed_cases
Beispiel #3
0
def parse_filings_on_cloud(
    afterdate: datetime.date,
    beforedate: datetime.date,
    get_old_active=True,
    showbrowser=False,
    scraper: Optional[scrapers.FakeScraper] = None,
):
    """Parses filings without command line interface and outfile options."""

    logger.info(f"Parsing filings between {afterdate} and {beforedate}.")

    if not scraper:
        scraper = scrapers.TravisScraper(headless=not showbrowser)

    all_case_nums = scraper.get_all_case_nums(afterdate=afterdate,
                                              beforedate=beforedate)
    if get_old_active:
        from persist import get_old_active_case_nums

        all_case_nums += get_old_active_case_nums()

    # using dict to eliminate duplicates
    all_case_nums = list(dict.fromkeys(all_case_nums))
    logger.info(
        f"Found {len(all_case_nums)} case numbers (including old active ones)."
    )
    cases = parse_all_from_parse_filings(all_case_nums, scraper=scraper)

    # persist cases only if not using the test scraper
    if isinstance(scraper, scrapers.TravisScraper):
        persist_parsed_cases(cases)

    return cases
Beispiel #4
0
 def test_fetch_settings(self):
     scraper = scrapers.TravisScraper()
     settings = scraper.make_setting_list(days_to_pull=[date(2020, 9, 1)])
     assert any(case["case_number"] == "J1-CV-19-005480"
                for case in settings)
Beispiel #5
0
from datetime import date

import pytest

import scrapers
import config

config.county = "travis"

scraper = scrapers.TravisScraper()
williamson_scraper = scrapers.WilliamsonScraper()


class TestFetchFilingsPage:
    def test_fetch_filings_page(self):

        fetched = scraper.query_filings(
            afterdate=date(2020, 6, 1),
            beforedate=date(2020, 6, 30),
            case_num_prefix="J1-CV-20*",
        )
        assert "J1-CV-20-001773" in fetched


class TestFetchCaseNumbers:
    def test_fetch_case_numbers_requiring_split(self):
        """
        Test date range requiring multiple pages of search results.

        The scraper will need to split this into multiple queries and combine the results.
        """