def _parse_and_persist_settings( afterdate: dt.date, beforedate: dt.date, outfile: str = "", showbrowser: bool = False, db: bool = True, gs: bool = True, county: str = "travis", ): pulled_settings = parse_settings(afterdate, beforedate, outfile, showbrowser, county=county) if db: import persist for setting in pulled_settings: persist.rest_setting(setting) if gs: import gsheet gsheet.write_pulled_settings(pulled_settings) if outfile: json.dump(pulled_settings, outfile) return pulled_settings
def parse_settings_on_cloud(afterdate: str, beforedate: str, write_to_sheets=True): """ Same as `parse_settings()` (see below) but without command line interface and showbrowser option. Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True """ logger.info(f"Parsing settings between {afterdate} and {beforedate}.") days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate) pulled_settings = make_setting_list(days_to_pull) for setting in pulled_settings: persist.rest_setting(setting) #maybe make this cleaner in sql? future work if write_to_sheets: gsheet.write_data( gsheet.open_sheet(gsheet.init_sheets(), "Court_scraper_eviction_scheduler", "eviction_scheduler"), gsheet.morning_afternoon( gsheet.combine_cols( gsheet.filter_df( gsheet.filter_df(pd.DataFrame(pulled_settings), 'setting_type', 'Eviction'), 'hearing_type', '(Hearing)|(Trial)'), ['case_number', 'setting_style'], 'case_dets').drop_duplicates("case_number", keep="last")))
def parse_settings_on_cloud( afterdate: str, beforedate: str, write_to_sheets=True, scraper: Optional[scrapers.FakeScraper] = None, ): """ Same as `parse_settings()` (see below) but without command line interface and showbrowser option. Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True """ if scraper is None: scraper = scrapers.TravisScraper() logger.info(f"Parsing settings between {afterdate} and {beforedate}.") days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate) pulled_settings = scraper.make_setting_list(days_to_pull) if scraper is None: scraper = scrapers.TravisScraper(headless=True) if isinstance(scraper, scrapers.TravisScraper): import persist for setting in pulled_settings: persist.rest_setting(setting) # maybe make this cleaner in sql? future work if write_to_sheets: import gsheet gsheet.write_pulled_settings(pulled_settings)
def parse_settings(afterdate, beforedate, outfile, showbrowser=False): # If showbrowser is True, use the default selenium driver if showbrowser: from selenium import webdriver fetch_page.driver = webdriver.Firefox() days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate) pulled_settings = make_setting_list(days_to_pull) for setting in pulled_settings: persist.rest_setting(setting) json.dump(pulled_settings, outfile)
def parse_settings(afterdate, beforedate, outfile, showbrowser=False): """Gets data for all settings between `afterdate` and `beforedate` and sends results to PostgreSQL database.""" # If showbrowser is True, use the default selenium driver if showbrowser: from selenium import webdriver fetch_page.driver = webdriver.Chrome("./chromedriver") days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate) pulled_settings = make_setting_list(days_to_pull) for setting in pulled_settings: persist.rest_setting(setting) gsheet.write_data(gsheet.open_sheet(gsheet.init_sheets(),"Court_scraper_eviction_scheduler","eviction_scheduler"),gsheet.morning_afternoon(gsheet.combine_cols(gsheet.filter_df(gsheet.filter_df(pd.DataFrame(pulled_settings),'setting_type','Eviction'),'hearing_type','(Hearing)|(Trial)'),['case_number','setting_style'],'case_dets').drop_duplicates("case_number", keep="last"))) json.dump(pulled_settings, outfile)
def parse_settings_on_cloud(afterdate, beforedate): logger.info(f"Parsing settings between {afterdate} and {beforedate}.") days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate) pulled_settings = make_setting_list(days_to_pull) for setting in pulled_settings: persist.rest_setting(setting) gsheet.write_data( gsheet.open_sheet(gsheet.init_sheets(), "Court_scraper_eviction_scheduler", "eviction_scheduler"), gsheet.combine_cols( gsheet.filter_df( gsheet.filter_df(pd.DataFrame(pulled_settings), 'setting_type', 'Eviction'), 'hearing_type', '(Hearing)|(Trial)'), ['case_number', 'setting_style'], 'case_dets'))
def parse_and_persist_settings(afterdate: str, beforedate: str, outfile: str, showbrowser=False): pulled_settings = parse_settings(afterdate, beforedate, outfile, showbrowser) for setting in pulled_settings: persist.rest_setting(setting) gsheet.write_data( gsheet.open_sheet(gsheet.init_sheets(), "Court_scraper_eviction_scheduler", "eviction_scheduler"), gsheet.morning_afternoon( gsheet.combine_cols( gsheet.filter_df( gsheet.filter_df(pd.DataFrame(pulled_settings), 'setting_type', 'Eviction'), 'hearing_type', '(Hearing)|(Trial)'), ['case_number', 'setting_style'], 'case_dets').drop_duplicates("case_number", keep="last"))) json.dump(pulled_settings, outfile)