def get_hospitalizations(session): """Returns a DataFrame of per-facility hospitalization stats.""" cache_path = cached_path(session, f"{DATA_URL}:feather") if cache_path.exists(): df = pandas.read_feather(cache_path) else: response = session.get(DATA_URL) response.raise_for_status() df = pandas.read_csv( io.StringIO(response.text), dtype={ "hospital_pk": str, "fips_code": "Int64", "ccn": str, }, parse_dates=["collection_week"], date_parser=lambda v: pandas.to_datetime(v, utc=True), ) df.dropna(subset=["fips_code"], inplace=True) df.reset_index(drop=True, inplace=True) with temp_to_rename(cache_path) as temp_path: df.to_feather(temp_path) df.set_index( ["fips_code", "hospital_pk", "collection_week"], drop=True, inplace=True, verify_integrity=True, ) df.sort_index(inplace=True) return df
def get_vaccinations(session): """Returns a DataFrame of county-level vaccination stats.""" cache_path = cached_path(session, f"{DATA_URL}:feather") if cache_path.exists(): df = pandas.read_feather(cache_path) else: response = session.get(DATA_URL) response.raise_for_status() df = pandas.read_csv( io.StringIO(response.text), dtype={"FIPS": "Int64"}, na_values=["UNK"], ) df = df[[ "Date", "FIPS", "Administered_Dose1_Recip", "Series_Complete_Yes", "Booster_Doses", ]] df.dropna(subset=["FIPS"], inplace=True) df.Date = pandas.to_datetime(df.Date, utc=True) df.reset_index(drop=True, inplace=True) with temp_to_rename(cache_path) as temp_path: df.to_feather(temp_path) df.set_index(["FIPS", "Date"], drop=True, inplace=True, verify_integrity=True) df.sort_index(inplace=True) return df
def get_world(session, args): """Returns data organized into a tree rooted at a World region. Warnings are captured and printed, then raise a ValueError exception.""" cache_path = cache_policy.cached_path(session, _world_cache_key(args)) if cache_path.exists(): logging.info(f"Loading cached world: {cache_path}") with cache_path.open(mode="rb") as cache_file: return pickle.load(cache_file) with collecting_warnings(allow_regex=KNOWN_WARNINGS_REGEX) as warnings: world = _compute_world(session, args) if warnings: raise ValueError(f"{len(warnings)} warnings found combining data") logging.info(f"Saving cached world: {cache_path}") with cache_policy.temp_to_rename(cache_path, mode="wb") as cache_file: pickle.dump(world, cache_file) return world
def get_covid(session): """Returns a DataFrame of COVID-19 daily records.""" cache_path = cached_path(session, f"{REPORTS_URL}:feather") if cache_path.exists(): df = pandas.read_feather(cache_path) else: reports = [] start_day = pandas.Timestamp("01-22-2020", tz="UTC") days = pandas.date_range(start_day, pandas.Timestamp.now(tz="UTC")) for day in days: mm_dd_yyyy = day.strftime("%m-%d-%Y") response = session.get(f"{REPORTS_URL}/{mm_dd_yyyy}.csv") if response.status_code != 404: response.raise_for_status() data = io.StringIO(response.text) rep = pandas.read_csv(data, na_values="", keep_default_na=False) rep.drop( columns=[ "Active", "Case-Fatality_Ratio", "Case_Fatality_Ratio", "Combined_Key", "FIPS", "Incident_Rate", "Incidence_Rate", "Lat", "Long_", "Latitude", "Longitude", "Recovered", ], inplace=True, errors="ignore", ) rep.rename( columns={ "Country/Region": "Country_Region", "Last Update": "Last_Update", "Province/State": "Province_State", }, inplace=True, ) rep["Date"] = day rep.Last_Update = pandas.to_datetime(rep.Last_Update, utc=True) reports.append(rep) place_cols = ["Country_Region", "Province_State", "Admin2"] ids = get_places(session).reset_index().set_index(place_cols)[["ID"]] df = pandas.concat(reports, ignore_index=True) for str_col in df.select_dtypes("object").columns: df[str_col].fillna("", inplace=True) df = df.merge(ids, left_on=place_cols, right_index=True) df.drop(columns=place_cols, inplace=True) df.reset_index(drop=True, inplace=True) with temp_to_rename(cache_path) as temp_path: df.to_feather(temp_path) df = df.groupby(["ID", "Date"], as_index=False).first() df.set_index(["ID", "Date"], inplace=True, verify_integrity=True) return df
import argparse from covid import cache_policy from covid import logging_policy # noqa parser = argparse.ArgumentParser(parents=[cache_policy.argument_parser]) parser.add_argument("--id", type=int) args = parser.parse_args() session = cache_policy.new_session(args) print("Loading places...") places = get_places(session) places.info() print() print("Cache:", cached_path(session, f"{REPORTS_URL}:feather")) print("Loading COVID data...") covid = get_covid(session) covid.info() print() if args.id: print(f"=== PLACE ID={args.id} ===") place = places.loc[args.id] print(place) print() print(f"=== COVID ID={args.id} ===") print(covid.loc[args.id]) else:
def credits(): return {"https://healthdata.gov/": "HealthData.gov"} if __name__ == "__main__": import argparse from covid import cache_policy from covid import logging_policy # noqa parser = argparse.ArgumentParser(parents=[cache_policy.argument_parser]) args = parser.parse_args() session = cache_policy.new_session(args) print("Cache:", cached_path(session, f"{DATA_URL}:feather")) print("Loading hospitalizations...") df = get_hospitalizations(session) df.info(verbose=True, show_counts=True) print() print("=== FACILITIES ===") key_cols = ["fips_code", "hospital_pk"] for (fips, pk), rows in df.groupby(level=key_cols): last = rows.iloc[-1] print(f"--- fips={fips} pk={pk} t={last.name[-1]} ---") for key, val in last.iteritems(): print(key) print(f" {val}") print()