Beispiel #1
0
def run_module(params) -> None:
    """
    Run entire hhs_facilities indicator.

    Parameters
    ----------
    params
        Dictionary containing indicator configuration. Expected to have the following structure:
        - "common":
            - "export_dir": str, directory to write output
    """
    start_time = time.time()
    logger = get_structured_logger(
        __name__,
        filename=params["common"].get("log_filename"),
        log_exceptions=params["common"].get("log_exceptions", True))

    raw_df = pull_data()
    gmpr = GeoMapper()
    filled_fips_df = fill_missing_fips(raw_df, gmpr)
    for geo, (sig_name, sig_cols, sig_func,
              sig_offset) in product(GEO_RESOLUTIONS, SIGNALS):
        mapped_df = convert_geo(filled_fips_df, geo, gmpr)
        output_df = generate_signal(mapped_df, sig_cols, sig_func, sig_offset)
        create_export_csv(output_df, params["common"]["export_dir"], geo,
                          sig_name)

    elapsed_time_in_seconds = round(time.time() - start_time, 2)
    logger.info("Completed indicator run",
                elapsed_time_in_seconds=elapsed_time_in_seconds)
def run_module() -> None:
    """Run entire hhs_facilities indicator."""
    params = read_params()
    raw_df = pull_data()
    gmpr = GeoMapper()
    filled_fips_df = fill_missing_fips(raw_df, gmpr)
    for geo, (sig_name, sig_cols, sig_func,
              sig_offset) in product(GEO_RESOLUTIONS, SIGNALS):
        mapped_df = convert_geo(filled_fips_df, geo, gmpr)
        output_df = generate_signal(mapped_df, sig_cols, sig_func, sig_offset)
        create_export_csv(output_df, params["export_dir"], geo, sig_name)
Beispiel #3
0
def run_module(params):
    """
    Generate ground truth HHS hospitalization data.

    Parameters
    ----------
    params
        Dictionary containing indicator configuration. Expected to have the following structure:
        - "common":
            - "export_dir": str, directory to write output
            - "log_filename" (optional): str, name of file to write logs
    """
    start_time = time.time()
    logger = get_structured_logger(
        __name__,
        filename=params["common"].get("log_filename"),
        log_exceptions=params["common"].get("log_exceptions", True))
    mapper = GeoMapper()
    request_all_states = ",".join(mapper.get_geo_values("state_id"))
    today = date.today()
    past_reference_day = date(year=2020, month=1,
                              day=1)  # first available date in DB
    date_range = generate_date_ranges(past_reference_day, today)
    dfs = []
    for r in date_range:
        response = Epidata.covid_hosp(request_all_states, r)
        # The last date range might only have recent days that don't have any data, so don't error.
        if response["result"] != 1 and r != date_range[-1]:
            raise Exception(f"Bad result from Epidata: {response['message']}")
        if response["result"] == -2 and r == date_range[
                -1]:  # -2 code means no results
            continue
        dfs.append(pd.DataFrame(response['epidata']))
    all_columns = pd.concat(dfs)

    geo_mapper = GeoMapper()

    for sig in SIGNALS:
        state = geo_mapper.add_geocode(make_signal(all_columns, sig),
                                       "state_id",
                                       "state_code",
                                       from_col="state")
        for geo in GEOS:
            create_export_csv(make_geo(state, geo, geo_mapper),
                              params["common"]["export_dir"], geo, sig)

    elapsed_time_in_seconds = round(time.time() - start_time, 2)
    logger.info("Completed indicator run",
                elapsed_time_in_seconds=elapsed_time_in_seconds)
def process_window(df_list: List[pd.DataFrame],
                   signal_names: List[str],
                   geo_resolutions: List[str],
                   export_dir: str):
    """Process a list of input census block group-level data frames as a single data set and export.

    Assumes each data frame has _only_ one date of data.

    Parameters
    ----------
    cbg_df: pd.DataFrame
        list of census block group-level frames.
    signal_names: List[str]
        signal names to be processed
    geo_resolutions: List[str]
        List of geo resolutions to export the data.
    export_dir
        path where the output files are saved
    Returns
    -------
    None.  One file is written per (signal, resolution) pair containing the
    aggregated data from `df`.
    """
    for df in df_list:
        validate(df)
    date = date_from_timestamp(df_list[0].at[0, 'date_range_start'])
    cbg_df = pd.concat(construct_signals(df, signal_names) for df in df_list)
    for geo_res in geo_resolutions:
        aggregated_df = aggregate(cbg_df, signal_names, geo_res)
        for signal in signal_names:
            df_export = aggregated_df[
                ['geo_id']
                + [f'{signal}_{x}' for x in ('mean', 'se', 'n')]
            ].rename({
                f'{signal}_mean': 'val',
                f'{signal}_se': 'se',
                f'{signal}_n': 'sample_size',
            }, axis=1)
            df_export["timestamp"] = date.strftime('%Y%m%d')
            create_export_csv(df_export,
                              export_dir,
                              geo_res,
                              signal,
                              )
Beispiel #5
0
def run_module(params):
    """
    Runs the indicator

    Arguments
    --------
    params:  Dict[str, Any]
        Nested dictionary of parameters.
    """
    start_time = time.time()
    logger = get_structured_logger(
        __name__,
        filename=params["common"].get("log_filename"),
        log_exceptions=params["common"].get("log_exceptions", True))
    mapper = GeoMapper()
    run_stats = []
    ## build the base version of the signal at the most detailed geo level you can get.
    ## compute stuff here or farm out to another function or file
    all_data = pd.DataFrame(
        columns=["timestamp", "val", "zip", "sample_size", "se"])
    ## aggregate & smooth
    ## TODO: add num/prop variations if needed
    for sensor, smoother, geo in product(SIGNALS, SMOOTHERS, GEOS):
        df = mapper.replace_geocode(all_data,
                                    "zip",
                                    geo,
                                    new_col="geo_id",
                                    date_col="timestamp")
        ## TODO: recompute sample_size, se here if not NA
        df["val"] = df[["geo_id", "val"]].groupby("geo_id")["val"].transform(
            smoother[0].smooth)
        sensor_name = sensor + smoother[
            1]  ## TODO: +num/prop variation if used
        # don't export first 6 days for smoothed signals since they'll be nan.
        start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(
            df.timestamp)
        dates = create_export_csv(df,
                                  params["common"]["export_dir"],
                                  geo,
                                  sensor_name,
                                  start_date=start_date)
        if len(dates) > 0:
            run_stats.append((max(dates), len(dates)))
    ## log this indicator run
    elapsed_time_in_seconds = round(time.time() - start_time, 2)
    min_max_date = run_stats and min(s[0] for s in run_stats)
    csv_export_count = sum(s[-1] for s in run_stats)
    max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days
    formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d")
    logger.info("Completed indicator run",
                elapsed_time_in_seconds=elapsed_time_in_seconds,
                csv_export_count=csv_export_count,
                max_lag_in_days=max_lag_in_days,
                oldest_final_export_date=formatted_min_max_date)
def run_module():
    """Generate ground truth HHS hospitalization data."""
    params = read_params()
    mapper = GeoMapper()
    request_all_states = ",".join(mapper.get_geo_values("state_id"))

    today = date.today()
    past_reference_day = date(year=2020, month=1,
                              day=1)  # first available date in DB
    date_range = generate_date_ranges(past_reference_day, today)
    dfs = []
    for r in date_range:
        response = Epidata.covid_hosp(request_all_states, r)
        if response['result'] != 1:
            raise Exception(f"Bad result from Epidata: {response['message']}")
        dfs.append(pd.DataFrame(response['epidata']))
    all_columns = pd.concat(dfs)

    for sig in SIGNALS:
        create_export_csv(make_signal(all_columns, sig), params["export_dir"],
                          "state", sig)