Exemple #1
0
def parse_settings_on_cloud(afterdate: str,
                            beforedate: str,
                            write_to_sheets=True):
    """
    Same as `parse_settings()` (see below) but without command line interface and showbrowser option.
    Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True
    """

    logger.info(f"Parsing settings between {afterdate} and {beforedate}.")

    days_to_pull = get_days_between_dates(afterdate=afterdate,
                                          beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    #maybe make this cleaner in sql? future work
    if write_to_sheets:
        gsheet.write_data(
            gsheet.open_sheet(gsheet.init_sheets(),
                              "Court_scraper_eviction_scheduler",
                              "eviction_scheduler"),
            gsheet.morning_afternoon(
                gsheet.combine_cols(
                    gsheet.filter_df(
                        gsheet.filter_df(pd.DataFrame(pulled_settings),
                                         'setting_type', 'Eviction'),
                        'hearing_type', '(Hearing)|(Trial)'),
                    ['case_number', 'setting_style'],
                    'case_dets').drop_duplicates("case_number", keep="last")))
def parse_and_persist_settings(afterdate: str,
                               beforedate: str,
                               outfile: str,
                               showbrowser=False):
    pulled_settings = parse_settings(afterdate, beforedate, outfile,
                                     showbrowser)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    gsheet.write_data(
        gsheet.open_sheet(gsheet.init_sheets(),
                          "Court_scraper_eviction_scheduler",
                          "eviction_scheduler"),
        gsheet.morning_afternoon(
            gsheet.combine_cols(
                gsheet.filter_df(
                    gsheet.filter_df(pd.DataFrame(pulled_settings),
                                     'setting_type', 'Eviction'),
                    'hearing_type', '(Hearing)|(Trial)'),
                ['case_number', 'setting_style'],
                'case_dets').drop_duplicates("case_number", keep="last")))
    json.dump(pulled_settings, outfile)
Exemple #3
0
def parse_settings(afterdate, beforedate, outfile, showbrowser=False):
    """Gets data for all settings between `afterdate` and `beforedate` and sends results to PostgreSQL database."""

    # If showbrowser is True, use the default selenium driver
    if showbrowser:
        from selenium import webdriver
        fetch_page.driver = webdriver.Chrome("./chromedriver")

    days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    gsheet.write_data(gsheet.open_sheet(gsheet.init_sheets(),"Court_scraper_eviction_scheduler","eviction_scheduler"),gsheet.morning_afternoon(gsheet.combine_cols(gsheet.filter_df(gsheet.filter_df(pd.DataFrame(pulled_settings),'setting_type','Eviction'),'hearing_type','(Hearing)|(Trial)'),['case_number','setting_style'],'case_dets').drop_duplicates("case_number", keep="last")))
    json.dump(pulled_settings, outfile)