コード例 #1
0
def parse_settings_on_cloud(afterdate: str,
                            beforedate: str,
                            write_to_sheets=True):
    """
    Same as `parse_settings()` (see below) but without command line interface and showbrowser option.
    Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True
    """

    logger.info(f"Parsing settings between {afterdate} and {beforedate}.")

    days_to_pull = get_days_between_dates(afterdate=afterdate,
                                          beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    #maybe make this cleaner in sql? future work
    if write_to_sheets:
        gsheet.write_data(
            gsheet.open_sheet(gsheet.init_sheets(),
                              "Court_scraper_eviction_scheduler",
                              "eviction_scheduler"),
            gsheet.morning_afternoon(
                gsheet.combine_cols(
                    gsheet.filter_df(
                        gsheet.filter_df(pd.DataFrame(pulled_settings),
                                         'setting_type', 'Eviction'),
                        'hearing_type', '(Hearing)|(Trial)'),
                    ['case_number', 'setting_style'],
                    'case_dets').drop_duplicates("case_number", keep="last")))
コード例 #2
0
def read_data(creds, ws, gs):
    """Read court tracking data in and drop duplicate case numb
ers"""
    #    try:
    df = gsheet.read_data(gsheet.open_sheet(gsheet.init_sheets(creds), ws, gs))
    #    df.drop_duplicates("Case Number",inplace=True) #Do we want to drop duplicates???
    return df
コード例 #3
0
def read_data(creds):
    """Read court tracking data in and drop duplicate case numbers"""
    #    try:
    df = gsheet.read_data(
        gsheet.open_sheet(gsheet.init_sheets(creds),
                          "01_Community_lawyer_test_out_final", "Frontend"))
    #    df.drop_duplicates("Case Number",inplace=True) #Do we want to drop duplicates???
    return df
コード例 #4
0
def parse_settings(afterdate, beforedate, outfile, showbrowser=False):
    """Gets data for all settings between `afterdate` and `beforedate` and sends results to PostgreSQL database."""

    # If showbrowser is True, use the default selenium driver
    if showbrowser:
        from selenium import webdriver
        fetch_page.driver = webdriver.Chrome("./chromedriver")

    days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    gsheet.write_data(gsheet.open_sheet(gsheet.init_sheets(),"Court_scraper_eviction_scheduler","eviction_scheduler"),gsheet.morning_afternoon(gsheet.combine_cols(gsheet.filter_df(gsheet.filter_df(pd.DataFrame(pulled_settings),'setting_type','Eviction'),'hearing_type','(Hearing)|(Trial)'),['case_number','setting_style'],'case_dets').drop_duplicates("case_number", keep="last")))
    json.dump(pulled_settings, outfile)
コード例 #5
0
def parse_settings_on_cloud(afterdate, beforedate):
    logger.info(f"Parsing settings between {afterdate} and {beforedate}.")

    days_to_pull = get_days_between_dates(afterdate=afterdate,
                                          beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)

    gsheet.write_data(
        gsheet.open_sheet(gsheet.init_sheets(),
                          "Court_scraper_eviction_scheduler",
                          "eviction_scheduler"),
        gsheet.combine_cols(
            gsheet.filter_df(
                gsheet.filter_df(pd.DataFrame(pulled_settings), 'setting_type',
                                 'Eviction'), 'hearing_type',
                '(Hearing)|(Trial)'), ['case_number', 'setting_style'],
            'case_dets'))
コード例 #6
0
def parse_and_persist_settings(afterdate: str,
                               beforedate: str,
                               outfile: str,
                               showbrowser=False):
    pulled_settings = parse_settings(afterdate, beforedate, outfile,
                                     showbrowser)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    gsheet.write_data(
        gsheet.open_sheet(gsheet.init_sheets(),
                          "Court_scraper_eviction_scheduler",
                          "eviction_scheduler"),
        gsheet.morning_afternoon(
            gsheet.combine_cols(
                gsheet.filter_df(
                    gsheet.filter_df(pd.DataFrame(pulled_settings),
                                     'setting_type', 'Eviction'),
                    'hearing_type', '(Hearing)|(Trial)'),
                ['case_number', 'setting_style'],
                'case_dets').drop_duplicates("case_number", keep="last")))
    json.dump(pulled_settings, outfile)
コード例 #7
0
def dump_to_sheets(sheet, worksheet, tables, filter_evictions=False):
    if os.getenv("LOCAL_DEV") != "true":
        sheet = gsheet.open_sheet(gsheet.init_sheets(), sheet, worksheet)
        dfs = []
        for table in tables:
            conn = connect_to_database.get_database_connection(local_dev)
            sql = "select * from " + table
            df = pd.read_sql_query(sql, conn)
            #Group cases with multiple events into the same case number do we want to do this it leads to columns with " , " junk
            #if table=="events": df = df.groupby("case_detail_id").fillna('').agg(', '.join).reset_index()
            dfs.append(df)
        df = reduce(
            lambda left, right: pd.merge(
                left, vright, von='case_number', vhow='outer'), dfs)
        if filter_evictions:
            gsheet.filter_df(df, 'case_type', 'Eviction')
        gsheet.write_data(sheet, df)
    else:
        logger.info(
            "Not dumping to google sheets because LOCAL_DEV environment variable is 'true'."
        )