Esempio n. 1
0
def get_hospitalizations(session):
    """Returns a DataFrame of per-facility hospitalization stats."""

    cache_path = cached_path(session, f"{DATA_URL}:feather")
    if cache_path.exists():
        df = pandas.read_feather(cache_path)
    else:
        response = session.get(DATA_URL)
        response.raise_for_status()
        df = pandas.read_csv(
            io.StringIO(response.text),
            dtype={
                "hospital_pk": str,
                "fips_code": "Int64",
                "ccn": str,
            },
            parse_dates=["collection_week"],
            date_parser=lambda v: pandas.to_datetime(v, utc=True),
        )

        df.dropna(subset=["fips_code"], inplace=True)
        df.reset_index(drop=True, inplace=True)
        with temp_to_rename(cache_path) as temp_path:
            df.to_feather(temp_path)

    df.set_index(
        ["fips_code", "hospital_pk", "collection_week"],
        drop=True,
        inplace=True,
        verify_integrity=True,
    )
    df.sort_index(inplace=True)
    return df
Esempio n. 2
0
def get_vaccinations(session):
    """Returns a DataFrame of county-level vaccination stats."""

    cache_path = cached_path(session, f"{DATA_URL}:feather")
    if cache_path.exists():
        df = pandas.read_feather(cache_path)
    else:
        response = session.get(DATA_URL)
        response.raise_for_status()
        df = pandas.read_csv(
            io.StringIO(response.text),
            dtype={"FIPS": "Int64"},
            na_values=["UNK"],
        )

        df = df[[
            "Date",
            "FIPS",
            "Administered_Dose1_Recip",
            "Series_Complete_Yes",
            "Booster_Doses",
        ]]

        df.dropna(subset=["FIPS"], inplace=True)
        df.Date = pandas.to_datetime(df.Date, utc=True)
        df.reset_index(drop=True, inplace=True)
        with temp_to_rename(cache_path) as temp_path:
            df.to_feather(temp_path)

    df.set_index(["FIPS", "Date"],
                 drop=True,
                 inplace=True,
                 verify_integrity=True)
    df.sort_index(inplace=True)
    return df
Esempio n. 3
0
def get_world(session, args):
    """Returns data organized into a tree rooted at a World region.
    Warnings are captured and printed, then raise a ValueError exception."""

    cache_path = cache_policy.cached_path(session, _world_cache_key(args))
    if cache_path.exists():
        logging.info(f"Loading cached world: {cache_path}")
        with cache_path.open(mode="rb") as cache_file:
            return pickle.load(cache_file)

    with collecting_warnings(allow_regex=KNOWN_WARNINGS_REGEX) as warnings:
        world = _compute_world(session, args)
        if warnings:
            raise ValueError(f"{len(warnings)} warnings found combining data")

    logging.info(f"Saving cached world: {cache_path}")
    with cache_policy.temp_to_rename(cache_path, mode="wb") as cache_file:
        pickle.dump(world, cache_file)
    return world
Esempio n. 4
0
def get_covid(session):
    """Returns a DataFrame of COVID-19 daily records."""

    cache_path = cached_path(session, f"{REPORTS_URL}:feather")
    if cache_path.exists():
        df = pandas.read_feather(cache_path)
    else:
        reports = []
        start_day = pandas.Timestamp("01-22-2020", tz="UTC")
        days = pandas.date_range(start_day, pandas.Timestamp.now(tz="UTC"))
        for day in days:
            mm_dd_yyyy = day.strftime("%m-%d-%Y")
            response = session.get(f"{REPORTS_URL}/{mm_dd_yyyy}.csv")
            if response.status_code != 404:
                response.raise_for_status()
                data = io.StringIO(response.text)
                rep = pandas.read_csv(data, na_values="", keep_default_na=False)
                rep.drop(
                    columns=[
                        "Active",
                        "Case-Fatality_Ratio",
                        "Case_Fatality_Ratio",
                        "Combined_Key",
                        "FIPS",
                        "Incident_Rate",
                        "Incidence_Rate",
                        "Lat",
                        "Long_",
                        "Latitude",
                        "Longitude",
                        "Recovered",
                    ],
                    inplace=True,
                    errors="ignore",
                )
                rep.rename(
                    columns={
                        "Country/Region": "Country_Region",
                        "Last Update": "Last_Update",
                        "Province/State": "Province_State",
                    },
                    inplace=True,
                )

                rep["Date"] = day
                rep.Last_Update = pandas.to_datetime(rep.Last_Update, utc=True)
                reports.append(rep)

        place_cols = ["Country_Region", "Province_State", "Admin2"]
        ids = get_places(session).reset_index().set_index(place_cols)[["ID"]]

        df = pandas.concat(reports, ignore_index=True)
        for str_col in df.select_dtypes("object").columns:
            df[str_col].fillna("", inplace=True)

        df = df.merge(ids, left_on=place_cols, right_index=True)
        df.drop(columns=place_cols, inplace=True)
        df.reset_index(drop=True, inplace=True)
        with temp_to_rename(cache_path) as temp_path:
            df.to_feather(temp_path)

    df = df.groupby(["ID", "Date"], as_index=False).first()
    df.set_index(["ID", "Date"], inplace=True, verify_integrity=True)
    return df
Esempio n. 5
0
    import argparse

    from covid import cache_policy
    from covid import logging_policy  # noqa

    parser = argparse.ArgumentParser(parents=[cache_policy.argument_parser])
    parser.add_argument("--id", type=int)
    args = parser.parse_args()
    session = cache_policy.new_session(args)

    print("Loading places...")
    places = get_places(session)
    places.info()
    print()

    print("Cache:", cached_path(session, f"{REPORTS_URL}:feather"))
    print("Loading COVID data...")
    covid = get_covid(session)
    covid.info()
    print()

    if args.id:
        print(f"=== PLACE ID={args.id} ===")
        place = places.loc[args.id]
        print(place)
        print()

        print(f"=== COVID ID={args.id} ===")
        print(covid.loc[args.id])

    else:
Esempio n. 6
0
def credits():
    return {"https://healthdata.gov/": "HealthData.gov"}


if __name__ == "__main__":
    import argparse

    from covid import cache_policy
    from covid import logging_policy  # noqa

    parser = argparse.ArgumentParser(parents=[cache_policy.argument_parser])
    args = parser.parse_args()
    session = cache_policy.new_session(args)

    print("Cache:", cached_path(session, f"{DATA_URL}:feather"))
    print("Loading hospitalizations...")
    df = get_hospitalizations(session)
    df.info(verbose=True, show_counts=True)
    print()

    print("=== FACILITIES ===")
    key_cols = ["fips_code", "hospital_pk"]
    for (fips, pk), rows in df.groupby(level=key_cols):
        last = rows.iloc[-1]
        print(f"--- fips={fips} pk={pk} t={last.name[-1]} ---")
        for key, val in last.iteritems():
            print(key)
            print(f"    {val}")
        print()