def run_module(params): """ Generate ground truth HHS hospitalization data. Parameters ---------- params Dictionary containing indicator configuration. Expected to have the following structure: - "common": - "export_dir": str, directory to write output - "log_filename" (optional): str, name of file to write logs """ start_time = time.time() logger = get_structured_logger( __name__, filename=params["common"].get("log_filename"), log_exceptions=params["common"].get("log_exceptions", True)) mapper = GeoMapper() request_all_states = ",".join(mapper.get_geo_values("state_id")) today = date.today() past_reference_day = date(year=2020, month=1, day=1) # first available date in DB date_range = generate_date_ranges(past_reference_day, today) dfs = [] for r in date_range: response = Epidata.covid_hosp(request_all_states, r) # The last date range might only have recent days that don't have any data, so don't error. if response["result"] != 1 and r != date_range[-1]: raise Exception(f"Bad result from Epidata: {response['message']}") if response["result"] == -2 and r == date_range[ -1]: # -2 code means no results continue dfs.append(pd.DataFrame(response['epidata'])) all_columns = pd.concat(dfs) geo_mapper = GeoMapper() for sig in SIGNALS: state = geo_mapper.add_geocode(make_signal(all_columns, sig), "state_id", "state_code", from_col="state") for geo in GEOS: create_export_csv(make_geo(state, geo, geo_mapper), params["common"]["export_dir"], geo, sig) elapsed_time_in_seconds = round(time.time() - start_time, 2) logger.info("Completed indicator run", elapsed_time_in_seconds=elapsed_time_in_seconds)
def test_good_file(self): df = pull_nchs_mortality_data(TOKEN, "test_data.csv") # Test columns assert (df.columns.values == [ 'covid_19_deaths', 'total_deaths', 'percent_of_expected_deaths', 'pneumonia_deaths', 'pneumonia_and_covid_19_deaths', 'influenza_deaths', 'pneumonia_influenza_or_covid_19_deaths', "timestamp", "geo_id", "population" ]).all() # Test aggregation for NYC and NY raw_df = pd.read_csv("./test_data/test_data.csv", parse_dates=["start_week"]) raw_df = standardize_columns(raw_df) for metric in METRICS: ny_list = raw_df.loc[(raw_df["state"] == "New York") & (raw_df[metric].isnull()), "timestamp"].values nyc_list = raw_df.loc[(raw_df["state"] == "New York City") & (raw_df[metric].isnull()), "timestamp"].values final_list = df.loc[(df["geo_id"] == "ny") & (df[metric].isnull()), "timestamp"].values assert set(final_list) == set(ny_list).intersection(set(nyc_list)) # Test missing value gmpr = GeoMapper() state_ids = pd.DataFrame(list(gmpr.get_geo_values("state_id"))) state_names = gmpr.replace_geocode(state_ids, "state_id", "state_name", from_col=0, date_col=None) for state, geo_id in zip(state_names, state_ids): if state in set(["New York", "New York City"]): continue for metric in METRICS: test_list = raw_df.loc[(raw_df["state"] == state) & (raw_df[metric].isnull()), "timestamp"].values final_list = df.loc[(df["geo_id"] == geo_id) & (df[metric].isnull()), "timestamp"].values assert set(final_list) == set(test_list)
def run_module(): """Generate ground truth HHS hospitalization data.""" params = read_params() mapper = GeoMapper() request_all_states = ",".join(mapper.get_geo_values("state_id")) today = date.today() past_reference_day = date(year=2020, month=1, day=1) # first available date in DB date_range = generate_date_ranges(past_reference_day, today) dfs = [] for r in date_range: response = Epidata.covid_hosp(request_all_states, r) if response['result'] != 1: raise Exception(f"Bad result from Epidata: {response['message']}") dfs.append(pd.DataFrame(response['epidata'])) all_columns = pd.concat(dfs) for sig in SIGNALS: create_export_csv(make_signal(all_columns, sig), params["export_dir"], "state", sig)