def run_module(params) -> None: """ Run entire hhs_facilities indicator. Parameters ---------- params Dictionary containing indicator configuration. Expected to have the following structure: - "common": - "export_dir": str, directory to write output """ start_time = time.time() logger = get_structured_logger( __name__, filename=params["common"].get("log_filename"), log_exceptions=params["common"].get("log_exceptions", True)) raw_df = pull_data() gmpr = GeoMapper() filled_fips_df = fill_missing_fips(raw_df, gmpr) for geo, (sig_name, sig_cols, sig_func, sig_offset) in product(GEO_RESOLUTIONS, SIGNALS): mapped_df = convert_geo(filled_fips_df, geo, gmpr) output_df = generate_signal(mapped_df, sig_cols, sig_func, sig_offset) create_export_csv(output_df, params["common"]["export_dir"], geo, sig_name) elapsed_time_in_seconds = round(time.time() - start_time, 2) logger.info("Completed indicator run", elapsed_time_in_seconds=elapsed_time_in_seconds)
def run_module() -> None: """Run entire hhs_facilities indicator.""" params = read_params() raw_df = pull_data() gmpr = GeoMapper() filled_fips_df = fill_missing_fips(raw_df, gmpr) for geo, (sig_name, sig_cols, sig_func, sig_offset) in product(GEO_RESOLUTIONS, SIGNALS): mapped_df = convert_geo(filled_fips_df, geo, gmpr) output_df = generate_signal(mapped_df, sig_cols, sig_func, sig_offset) create_export_csv(output_df, params["export_dir"], geo, sig_name)
def run_module(params): """ Generate ground truth HHS hospitalization data. Parameters ---------- params Dictionary containing indicator configuration. Expected to have the following structure: - "common": - "export_dir": str, directory to write output - "log_filename" (optional): str, name of file to write logs """ start_time = time.time() logger = get_structured_logger( __name__, filename=params["common"].get("log_filename"), log_exceptions=params["common"].get("log_exceptions", True)) mapper = GeoMapper() request_all_states = ",".join(mapper.get_geo_values("state_id")) today = date.today() past_reference_day = date(year=2020, month=1, day=1) # first available date in DB date_range = generate_date_ranges(past_reference_day, today) dfs = [] for r in date_range: response = Epidata.covid_hosp(request_all_states, r) # The last date range might only have recent days that don't have any data, so don't error. if response["result"] != 1 and r != date_range[-1]: raise Exception(f"Bad result from Epidata: {response['message']}") if response["result"] == -2 and r == date_range[ -1]: # -2 code means no results continue dfs.append(pd.DataFrame(response['epidata'])) all_columns = pd.concat(dfs) geo_mapper = GeoMapper() for sig in SIGNALS: state = geo_mapper.add_geocode(make_signal(all_columns, sig), "state_id", "state_code", from_col="state") for geo in GEOS: create_export_csv(make_geo(state, geo, geo_mapper), params["common"]["export_dir"], geo, sig) elapsed_time_in_seconds = round(time.time() - start_time, 2) logger.info("Completed indicator run", elapsed_time_in_seconds=elapsed_time_in_seconds)
def process_window(df_list: List[pd.DataFrame], signal_names: List[str], geo_resolutions: List[str], export_dir: str): """Process a list of input census block group-level data frames as a single data set and export. Assumes each data frame has _only_ one date of data. Parameters ---------- cbg_df: pd.DataFrame list of census block group-level frames. signal_names: List[str] signal names to be processed geo_resolutions: List[str] List of geo resolutions to export the data. export_dir path where the output files are saved Returns ------- None. One file is written per (signal, resolution) pair containing the aggregated data from `df`. """ for df in df_list: validate(df) date = date_from_timestamp(df_list[0].at[0, 'date_range_start']) cbg_df = pd.concat(construct_signals(df, signal_names) for df in df_list) for geo_res in geo_resolutions: aggregated_df = aggregate(cbg_df, signal_names, geo_res) for signal in signal_names: df_export = aggregated_df[ ['geo_id'] + [f'{signal}_{x}' for x in ('mean', 'se', 'n')] ].rename({ f'{signal}_mean': 'val', f'{signal}_se': 'se', f'{signal}_n': 'sample_size', }, axis=1) df_export["timestamp"] = date.strftime('%Y%m%d') create_export_csv(df_export, export_dir, geo_res, signal, )
def run_module(params): """ Runs the indicator Arguments -------- params: Dict[str, Any] Nested dictionary of parameters. """ start_time = time.time() logger = get_structured_logger( __name__, filename=params["common"].get("log_filename"), log_exceptions=params["common"].get("log_exceptions", True)) mapper = GeoMapper() run_stats = [] ## build the base version of the signal at the most detailed geo level you can get. ## compute stuff here or farm out to another function or file all_data = pd.DataFrame( columns=["timestamp", "val", "zip", "sample_size", "se"]) ## aggregate & smooth ## TODO: add num/prop variations if needed for sensor, smoother, geo in product(SIGNALS, SMOOTHERS, GEOS): df = mapper.replace_geocode(all_data, "zip", geo, new_col="geo_id", date_col="timestamp") ## TODO: recompute sample_size, se here if not NA df["val"] = df[["geo_id", "val"]].groupby("geo_id")["val"].transform( smoother[0].smooth) sensor_name = sensor + smoother[ 1] ## TODO: +num/prop variation if used # don't export first 6 days for smoothed signals since they'll be nan. start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min( df.timestamp) dates = create_export_csv(df, params["common"]["export_dir"], geo, sensor_name, start_date=start_date) if len(dates) > 0: run_stats.append((max(dates), len(dates))) ## log this indicator run elapsed_time_in_seconds = round(time.time() - start_time, 2) min_max_date = run_stats and min(s[0] for s in run_stats) csv_export_count = sum(s[-1] for s in run_stats) max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d") logger.info("Completed indicator run", elapsed_time_in_seconds=elapsed_time_in_seconds, csv_export_count=csv_export_count, max_lag_in_days=max_lag_in_days, oldest_final_export_date=formatted_min_max_date)
def run_module(): """Generate ground truth HHS hospitalization data.""" params = read_params() mapper = GeoMapper() request_all_states = ",".join(mapper.get_geo_values("state_id")) today = date.today() past_reference_day = date(year=2020, month=1, day=1) # first available date in DB date_range = generate_date_ranges(past_reference_day, today) dfs = [] for r in date_range: response = Epidata.covid_hosp(request_all_states, r) if response['result'] != 1: raise Exception(f"Bad result from Epidata: {response['message']}") dfs.append(pd.DataFrame(response['epidata'])) all_columns = pd.concat(dfs) for sig in SIGNALS: create_export_csv(make_signal(all_columns, sig), params["export_dir"], "state", sig)