Ejemplo n.º 1
0
def get_country_stats(country_alpha: str, metric_type: str) -> pd.DataFrame:
    """ Find the metric type data from the Johns Hopkins github csv file for
    a specific country and returns to the dataframe.

    :param: :country_alpha: :str: country alpha2 code.
    :param: :metric_type: :str: currently only confirmed or death supported
    :return: :pd.DataFrame: dataframe of the queried data.
    """

    country_alpha = country_alpha.upper()
    metric_type = metric_type.lower()

    try:
        df = parse_df(metric_type=metric_type)
    except:
        raise DataReadingError("error accessing country data")

    if country_alpha not in country_dict:
        raise ValueError(f"{country_alpha} not found in our dictionary.")
    country = country_dict[country_alpha]

    df = df[df["Country/Region"] == country]
    df = df.drop(columns=["Lat", "Long", "Country/Region", "Province/State"])
    df = df.sum(axis=0).to_frame().reset_index()
    df = df.rename(columns={0: metric_type.title()})
    df = df.reset_index(drop=True)
    df = df.rename(columns={"index": "Date"})

    return df
Ejemplo n.º 2
0
def read_county_stats(state: str, county: str) -> Dict:

    try:
        df = pd.read_csv(app_config.COUNTY_URL)
        deaths = pd.read_csv(app_config.STATE_DEATH)
    except:
        raise DataReadingError(
            f"Data reading error State: {state}, and County: {county}.")

    try:
        df.columns = map(str.lower, df.columns)
        df.columns = df.columns.str.replace(" ", "_")

        # used data source 2 for new death number
        deaths = deaths[deaths['Province_State'] == reverse_states_map[state]]
        deaths = deaths[deaths['Admin2'] == county]
        deaths = deaths.iloc[:, 12:].diff(axis=1).iloc[:, -1].values[0]

        df = df[df["state_name"] == reverse_states_map[state]]
        # df = df.query(f"county_name == '{county}'")
        df = df[df["county_name"] == county]
        df.new_death.iloc[0] = deaths
        df = pd.DataFrame.to_dict(df, orient="records")
        if len(df) == 0:
            raise DataValidationError("county.py len(df) == 0")
    except:
        raise DataValidationError(
            f"Can't find State: {state}, and County: {county} combination.")
    return df
Ejemplo n.º 3
0
def read_county_stats(state: str, county: str) -> Dict:

    try:
        df = ingest_county_data(url=app_config.COUNTY_URL)
    except:
        raise DataReadingError(
            f"Data reading error State: {state}, and County: {county}.")

    # 2020-04-22 patch counties
    if (state == "WA") and (county in ["Benton", "Franklin"]):
        county = "Benton and Franklin"

    if (state == "MA") and (county in ["Dukes", "Nantucket"]):
        county = "Dukes and Nantucket"

    # 2020-04-26 patch territories and districts
    territories = ["DC", "GU", "AS", "PR", "MP"]

    # Fetch state data
    full_state_name = state
    try:
        full_state_name = reverse_states_map[state]
        df = df[df["state_name"] == full_state_name]
        if len(df) == 0:
            raise DataValidationError(
                f"No records found for {full_state_name} in our database.")
    except:
        raise DataReadingError(
            f"Can't find {full_state_name} in our database.")

    # Now fetch county data
    try:
        if state in territories:
            df = df.reset_index(drop=True)
            df.loc[0, "county_name"] = full_state_name
            # 2020-04-26 pandanmic
            # df["county_name"] == full_state_name
        else:
            df = df[df["county_name"] == county]
        if len(df) == 0:
            raise DataValidationError(
                f"No records found for {full_state_name} in our database.")
    except:
        raise DataValidationError(f"Can't find State: {full_state_name},"
                                  f" and County: {county} combination.")
    df = pd.DataFrame.to_dict(df, orient="records")
    return df
Ejemplo n.º 4
0
def read_county_stats_zip_ny(zipcode: str) -> Dict:
    """Return stats for New York State zip_codes
    """

    zip_info = zipcodes.matching(str(zipcode))[0]
    county = zip_info["county"].rsplit(" ", 1)[0]
    state = zip_info["state"]

    try:
        deaths = pd.read_csv(app_config.STATE_DEATH)
        confirmed_df = pd.read_csv(app_config.STATE_CONFIRMED)
    except:
        raise DataReadingError(
            f"Data reading error State: {state}, and County: {county}.")

    try:
        confirmed_df = confirmed_df[confirmed_df["Province_State"] ==
                                    reverse_states_map[state]]
        confirmed_df = confirmed_df[confirmed_df["Admin2"] == county]

        confirmed = confirmed_df.iloc[:, -1]
        new_confirmed = (confirmed_df.iloc[:, 12:].astype("int32").diff(
            axis=1).iloc[:, -1].values[0])

        # used data source 2 for new death number
        deaths = deaths[deaths["Province_State"] == reverse_states_map[state]]
        deaths = deaths[deaths["Admin2"] == county]
        # 4/15/20: force cast into int before diff as pd sometimes read as
        # float and throws nan.
        death = deaths.iloc[:, -1]
        new_death = (deaths.iloc[:, 12:].astype("int32").diff(
            axis=1).iloc[:, -1].values[0])
        try:
            fatality_rate = int(death) / int(confirmed)
        except:  # pylint: disable=W0702
            fatality_rate = 0

        data = {
            "county_name": county,
            "state_name": reverse_states_map[state],
            "confirmed": int(confirmed),
            "new": int(new_confirmed),
            "death": int(death),
            "new_death": int(new_death),
            "fatality_rate": f"{fatality_rate}%",
            "latitude": float(zip_info["lat"]),
            "longitude": float(zip_info["long"]),
            "last_update": str("2020-04-17 19:50 EDT"),
        }
        print(data)
        # data = json.dumps(data)
        # print(data)
    except:
        raise DataValidationError(
            f"Can't find State: {state}, and County: {county} combination.")
    return data
Ejemplo n.º 5
0
def read_states(state: str) -> Dict:
    """read date, confirmed, and death info of a state and return it as
    a dictionary
    """

    state = reverse_states_map[state]

    try:
        data = pd.read_csv(app_config.NYT_STATE)
        data = data[data['state'] == state]
        data = data[['date', 'cases', 'deaths']]
        data.columns = ['Date', 'Confirmed', 'Deaths']
        data = data.fillna(0)
        dict_data = pd.DataFrame.to_dict(data, orient="records")

        del data
        gc.collect()
    except:
        raise DataReadingError("error reading data")

    return dict_data
Ejemplo n.º 6
0
def parse_df(metric_type: str) -> pd.DataFrame:
    """ Parse data in Johns Hopkins github csv file for the supported metric_type
    and return the dataframe to people.

    :param: :str: :metric_type: Currently only confirmed or death supported
    :return: :pd.DataFrame: dataframe of the queried data.
    """
    if metric_type.startswith("confirmed"):
        metric_type = "confirmed"
    elif metric_type.startswith("death"):
        metric_type = "deaths"
    else:
        raise ValueError(f"{metric_type} metric type not supported")

    url = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric_type}_global.csv"
    try:
        df = pd.read_csv(url)
    except:
        raise DataReadingError("error accessing country data")

    return df
Ejemplo n.º 7
0
def test_DataReadingError_with_message():
    with pytest.raises(DataReadingError) as excinfo:
        raise DataReadingError("with message")
    assert str(excinfo.value) == "DataReadingError with message"
Ejemplo n.º 8
0
def get_daily_state_stats(state: str) -> Dict:
    """Get daily stats for a specific state, including tested, confirmed,
    todays_confirmed, deaths, and todays_deaths. Everything is initialized
    at zero.

    :params: :str: state. the state to look up.
    :return: :Dict: {"tested": str,
                     "todays_tested": str,
                     "confirmed": str,
                     "todays_confirmed": str,
                     "deaths": str,
                     "todays_deaths: str}
    """
    # initialize the variables so it doesnt crash if both api call failed

    tested, todays_tested, confirmed = 0, 0, 0
    todays_confirmed, deaths, todays_deaths = 0, 0, 0

    # Get tested data
    URL = app_config.CVTRACK_STATES_URL
    response = requests.get(url=URL)
    if response.status_code == 200:
        data = response.json()
        if isinstance(data, list):
            try:
                data = [d for d in data if d["state"] == state]
                curr = data[0]
                prev = data[1]
                todays_tested = (curr["totalTestResults"] -
                                 prev["totalTestResults"])
                tested = curr["totalTestResults"]
            except DataReadingError as ex:
                raise DataReadingError(f"error getting tested data {ex}")
        else:
            tested, todays_tested = 0, 0
    else:
        raise DataReadingError("get_daily_state_stats data reading error")

    # Get confirmed and deaths data
    try:
        base_url = app_config.COUNTY_URL
        df = pd.read_csv(base_url)
        df = df[df["State Name"] == reverse_states_map[state]]
        grouped = df.groupby(["State Name"])
        confirmed = grouped["Confirmed"].sum().values[0].astype(str)
        todays_confirmed = grouped["New"].sum().values[0].astype(str)
        deaths = grouped["Death"].sum().values[0].astype(str)
        todays_deaths = grouped["New Death"].sum().values[0].astype(str)
    except DataReadingError as ex:
        raise DataReadingError(f"get_daily_state_stats parsing error {ex}")

    stats = {
        "tested": tested,
        "todays_tested": todays_tested,
        "confirmed": confirmed,
        "todays_confirmed": todays_confirmed,
        "deaths": deaths,
        "todays_deaths": todays_deaths,
    }

    ###################################################################
    #                     Sanity Check
    ###################################################################
    if int(todays_tested) >= int(tested):
        raise DataValidationError("/stats tested number validation error")

    if int(todays_confirmed) >= int(confirmed):
        raise DataValidationError("/stats confirmed number validation error")

    if (int(confirmed) > int(tested)) or (int(deaths) > int(confirmed)):
        raise DataValidationError("/stats numbers comparison validation error")

    del df, data
    gc.collect()

    return stats
Ejemplo n.º 9
0
def get_daily_state_stats(state: str) -> Dict:
    """Get daily stats for a specific state, including tested, confirmed,
    todays_confirmed, deaths, and todays_deaths. Everything is initialized
    at zero.

    :params: :str: state. the state to look up.
    :return: :Dict: {"tested": str,
                     "todays_tested": str,
                     "confirmed": str,
                     "todays_confirmed": str,
                     "deaths": str,
                     "todays_deaths: str}
    """
    # initialize the variables so it doesnt crash if both api call failed

    tested, todays_tested, confirmed = 0, 0, 0
    todays_confirmed, deaths, todays_deaths = 0, 0, 0

    URL = app_config.CVTRACK_STATES_URL + f"/daily?state={state}"

    response = requests.get(url=URL)

    if response.status_code == 200:
        # covidtracking api throws error json if request error {'error': }
        if isinstance(response.json(), list):
            try:
                data = response.json()
                curr = data[0]
                prev = data[1]
                todays_tested = curr["totalTestResults"] - \
                                prev["totalTestResults"]
                tested = curr["totalTestResults"]
                todays_deaths = curr["deathIncrease"]
            except:
                raise DataReadingError("get_daily_state_stats parsing error")

        base_url = app_config.COUNTY_URL
        df = pd.read_csv(base_url)
        df = df[df["State Name"] == reverse_states_map[state]]
        grouped = df.groupby(["State Name"])
        confirmed = grouped["Confirmed"].sum().values[0].astype(str)
        todays_confirmed = grouped["New"].sum().values[0].astype(str)
        deaths = grouped["Death"].sum().values[0].astype(str)
    else:
        raise DataReadingError("get_daily_state_stats data reading error")

    stats = {
        "tested": tested,
        "todays_tested": todays_tested,
        "confirmed": confirmed,
        "todays_confirmed": todays_confirmed,
        "deaths": deaths,
        "todays_deaths": todays_deaths,
    }

    ###################################################################
    #                     Sanity Check
    ###################################################################
    if ((int(todays_tested) >= int(tested))
            or (int(todays_confirmed) >= int(confirmed))):
        # not checking for todays_deaths >= deaths
        raise DataValidationError("stats.py numbers doesn't make sense")

    if (int(confirmed) > int(tested)) or (int(deaths) > int(confirmed)):
        raise DataValidationError("stats.py numbers doesnt make sense")

    del df, data
    gc.collect()

    return stats