def parse_settings_on_cloud(afterdate: str, beforedate: str, write_to_sheets=True): """ Same as `parse_settings()` (see below) but without command line interface and showbrowser option. Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True """ logger.info(f"Parsing settings between {afterdate} and {beforedate}.") days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate) pulled_settings = make_setting_list(days_to_pull) for setting in pulled_settings: persist.rest_setting(setting) #maybe make this cleaner in sql? future work if write_to_sheets: gsheet.write_data( gsheet.open_sheet(gsheet.init_sheets(), "Court_scraper_eviction_scheduler", "eviction_scheduler"), gsheet.morning_afternoon( gsheet.combine_cols( gsheet.filter_df( gsheet.filter_df(pd.DataFrame(pulled_settings), 'setting_type', 'Eviction'), 'hearing_type', '(Hearing)|(Trial)'), ['case_number', 'setting_style'], 'case_dets').drop_duplicates("case_number", keep="last")))
def parse_and_persist_settings(afterdate: str, beforedate: str, outfile: str, showbrowser=False): pulled_settings = parse_settings(afterdate, beforedate, outfile, showbrowser) for setting in pulled_settings: persist.rest_setting(setting) gsheet.write_data( gsheet.open_sheet(gsheet.init_sheets(), "Court_scraper_eviction_scheduler", "eviction_scheduler"), gsheet.morning_afternoon( gsheet.combine_cols( gsheet.filter_df( gsheet.filter_df(pd.DataFrame(pulled_settings), 'setting_type', 'Eviction'), 'hearing_type', '(Hearing)|(Trial)'), ['case_number', 'setting_style'], 'case_dets').drop_duplicates("case_number", keep="last"))) json.dump(pulled_settings, outfile)
def parse_settings(afterdate, beforedate, outfile, showbrowser=False): """Gets data for all settings between `afterdate` and `beforedate` and sends results to PostgreSQL database.""" # If showbrowser is True, use the default selenium driver if showbrowser: from selenium import webdriver fetch_page.driver = webdriver.Chrome("./chromedriver") days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate) pulled_settings = make_setting_list(days_to_pull) for setting in pulled_settings: persist.rest_setting(setting) gsheet.write_data(gsheet.open_sheet(gsheet.init_sheets(),"Court_scraper_eviction_scheduler","eviction_scheduler"),gsheet.morning_afternoon(gsheet.combine_cols(gsheet.filter_df(gsheet.filter_df(pd.DataFrame(pulled_settings),'setting_type','Eviction'),'hearing_type','(Hearing)|(Trial)'),['case_number','setting_style'],'case_dets').drop_duplicates("case_number", keep="last"))) json.dump(pulled_settings, outfile)