def read_county_stats(state: str, county: str) -> Dict: try: df = pd.read_csv(app_config.COUNTY_URL) deaths = pd.read_csv(app_config.STATE_DEATH) except: raise DataReadingError( f"Data reading error State: {state}, and County: {county}.") try: df.columns = map(str.lower, df.columns) df.columns = df.columns.str.replace(" ", "_") # used data source 2 for new death number deaths = deaths[deaths['Province_State'] == reverse_states_map[state]] deaths = deaths[deaths['Admin2'] == county] deaths = deaths.iloc[:, 12:].diff(axis=1).iloc[:, -1].values[0] df = df[df["state_name"] == reverse_states_map[state]] # df = df.query(f"county_name == '{county}'") df = df[df["county_name"] == county] df.new_death.iloc[0] = deaths df = pd.DataFrame.to_dict(df, orient="records") if len(df) == 0: raise DataValidationError("county.py len(df) == 0") except: raise DataValidationError( f"Can't find State: {state}, and County: {county} combination.") return df
def read_county_stats(state: str, county: str) -> Dict: try: df = ingest_county_data(url=app_config.COUNTY_URL) except: raise DataReadingError( f"Data reading error State: {state}, and County: {county}.") # 2020-04-22 patch counties if (state == "WA") and (county in ["Benton", "Franklin"]): county = "Benton and Franklin" if (state == "MA") and (county in ["Dukes", "Nantucket"]): county = "Dukes and Nantucket" # 2020-04-26 patch territories and districts territories = ["DC", "GU", "AS", "PR", "MP"] # Fetch state data full_state_name = state try: full_state_name = reverse_states_map[state] df = df[df["state_name"] == full_state_name] if len(df) == 0: raise DataValidationError( f"No records found for {full_state_name} in our database.") except: raise DataReadingError( f"Can't find {full_state_name} in our database.") # Now fetch county data try: if state in territories: df = df.reset_index(drop=True) df.loc[0, "county_name"] = full_state_name # 2020-04-26 pandanmic # df["county_name"] == full_state_name else: df = df[df["county_name"] == county] if len(df) == 0: raise DataValidationError( f"No records found for {full_state_name} in our database.") except: raise DataValidationError(f"Can't find State: {full_state_name}," f" and County: {county} combination.") df = pd.DataFrame.to_dict(df, orient="records") return df
def read_county_stats_zip_ny(zipcode: str) -> Dict: """Return stats for New York State zip_codes """ zip_info = zipcodes.matching(str(zipcode))[0] county = zip_info["county"].rsplit(" ", 1)[0] state = zip_info["state"] try: deaths = pd.read_csv(app_config.STATE_DEATH) confirmed_df = pd.read_csv(app_config.STATE_CONFIRMED) except: raise DataReadingError( f"Data reading error State: {state}, and County: {county}.") try: confirmed_df = confirmed_df[confirmed_df["Province_State"] == reverse_states_map[state]] confirmed_df = confirmed_df[confirmed_df["Admin2"] == county] confirmed = confirmed_df.iloc[:, -1] new_confirmed = (confirmed_df.iloc[:, 12:].astype("int32").diff( axis=1).iloc[:, -1].values[0]) # used data source 2 for new death number deaths = deaths[deaths["Province_State"] == reverse_states_map[state]] deaths = deaths[deaths["Admin2"] == county] # 4/15/20: force cast into int before diff as pd sometimes read as # float and throws nan. death = deaths.iloc[:, -1] new_death = (deaths.iloc[:, 12:].astype("int32").diff( axis=1).iloc[:, -1].values[0]) try: fatality_rate = int(death) / int(confirmed) except: # pylint: disable=W0702 fatality_rate = 0 data = { "county_name": county, "state_name": reverse_states_map[state], "confirmed": int(confirmed), "new": int(new_confirmed), "death": int(death), "new_death": int(new_death), "fatality_rate": f"{fatality_rate}%", "latitude": float(zip_info["lat"]), "longitude": float(zip_info["long"]), "last_update": str("2020-04-17 19:50 EDT"), } print(data) # data = json.dumps(data) # print(data) except: raise DataValidationError( f"Can't find State: {state}, and County: {county} combination.") return data
def test_DataValidationError_with_message(): with pytest.raises(DataValidationError) as excinfo: raise DataValidationError("with message") assert str(excinfo.value) == "DataValidationError with message"
def get_daily_state_stats(state: str) -> Dict: """Get daily stats for a specific state, including tested, confirmed, todays_confirmed, deaths, and todays_deaths. Everything is initialized at zero. :params: :str: state. the state to look up. :return: :Dict: {"tested": str, "todays_tested": str, "confirmed": str, "todays_confirmed": str, "deaths": str, "todays_deaths: str} """ # initialize the variables so it doesnt crash if both api call failed tested, todays_tested, confirmed = 0, 0, 0 todays_confirmed, deaths, todays_deaths = 0, 0, 0 # Get tested data URL = app_config.CVTRACK_STATES_URL response = requests.get(url=URL) if response.status_code == 200: data = response.json() if isinstance(data, list): try: data = [d for d in data if d["state"] == state] curr = data[0] prev = data[1] todays_tested = (curr["totalTestResults"] - prev["totalTestResults"]) tested = curr["totalTestResults"] except DataReadingError as ex: raise DataReadingError(f"error getting tested data {ex}") else: tested, todays_tested = 0, 0 else: raise DataReadingError("get_daily_state_stats data reading error") # Get confirmed and deaths data try: base_url = app_config.COUNTY_URL df = pd.read_csv(base_url) df = df[df["State Name"] == reverse_states_map[state]] grouped = df.groupby(["State Name"]) confirmed = grouped["Confirmed"].sum().values[0].astype(str) todays_confirmed = grouped["New"].sum().values[0].astype(str) deaths = grouped["Death"].sum().values[0].astype(str) todays_deaths = grouped["New Death"].sum().values[0].astype(str) except DataReadingError as ex: raise DataReadingError(f"get_daily_state_stats parsing error {ex}") stats = { "tested": tested, "todays_tested": todays_tested, "confirmed": confirmed, "todays_confirmed": todays_confirmed, "deaths": deaths, "todays_deaths": todays_deaths, } ################################################################### # Sanity Check ################################################################### if int(todays_tested) >= int(tested): raise DataValidationError("/stats tested number validation error") if int(todays_confirmed) >= int(confirmed): raise DataValidationError("/stats confirmed number validation error") if (int(confirmed) > int(tested)) or (int(deaths) > int(confirmed)): raise DataValidationError("/stats numbers comparison validation error") del df, data gc.collect() return stats
def get_daily_stats() -> Dict: """Get daily stats for a specific state, including tested, confirmed, todays_confirmed, deaths, and todays_deaths. Everything is initialized at zero. :params: :str: state. the state to look up. :return: :Dict: {"tested": str, "todays_tested": str, "confirmed": str, "todays_confirmed": str, "deaths": str, "todays_deaths: str} """ # initialize the variables so it doesnt crash if both api call failed tested, todays_tested, confirmed = 0, 0, 0 todays_confirmed, deaths, todays_deaths = 0, 0, 0 try: data2 = requests.get(url=app_config.TMP_URL).json() confirmed = data2["cases"] todays_confirmed = data2["todayCases"] deaths = data2["deaths"] todays_deaths = data2["todayDeaths"] del data2 gc.collect() except DataReadingError as ex: _logger.error(f"stats.get_daily_stats {ex}") confirmed, todays_confirmed, deaths, todays_deaths = 0, 0, 0, 0 try: # covidtracking api data = requests.get(url=app_config.CVTRACK_URL).json() curr = data[0] prev = data[1] tested = curr["posNeg"] todays_tested = curr["totalTestResults"] - prev["totalTestResults"] confirmed = curr["positive"] todays_confirmed = curr["positive"] - prev["positive"] deaths = curr["death"] todays_deaths = curr["death"] - prev["death"] del data gc.collect() except DataReadingError as ex: _logger.error(f"stats.get_daily_stats {ex}") tested = 0 stats = { "tested": tested, "todays_tested": todays_tested, "confirmed": confirmed, "todays_confirmed": todays_confirmed, "deaths": deaths, "todays_deaths": todays_deaths, } ################################################################### # Sanity Check ################################################################### if int(todays_tested) >= int(tested): raise DataValidationError("get /stats tested validation error") if int(todays_confirmed) >= int(confirmed): raise DataValidationError("get /stats confirmed validation error") if (int(confirmed) > int(tested)) or (int(deaths) > int(confirmed)): raise DataValidationError("get /stats comparison validation error") return stats
def get_daily_state_stats(state: str) -> Dict: """Get daily stats for a specific state, including tested, confirmed, todays_confirmed, deaths, and todays_deaths. Everything is initialized at zero. :params: :str: state. the state to look up. :return: :Dict: {"tested": str, "todays_tested": str, "confirmed": str, "todays_confirmed": str, "deaths": str, "todays_deaths: str} """ # initialize the variables so it doesnt crash if both api call failed tested, todays_tested, confirmed = 0, 0, 0 todays_confirmed, deaths, todays_deaths = 0, 0, 0 URL = app_config.CVTRACK_STATES_URL + f"/daily?state={state}" response = requests.get(url=URL) if response.status_code == 200: # covidtracking api throws error json if request error {'error': } if isinstance(response.json(), list): try: data = response.json() curr = data[0] prev = data[1] todays_tested = curr["totalTestResults"] - \ prev["totalTestResults"] tested = curr["totalTestResults"] todays_deaths = curr["deathIncrease"] except: raise DataReadingError("get_daily_state_stats parsing error") base_url = app_config.COUNTY_URL df = pd.read_csv(base_url) df = df[df["State Name"] == reverse_states_map[state]] grouped = df.groupby(["State Name"]) confirmed = grouped["Confirmed"].sum().values[0].astype(str) todays_confirmed = grouped["New"].sum().values[0].astype(str) deaths = grouped["Death"].sum().values[0].astype(str) else: raise DataReadingError("get_daily_state_stats data reading error") stats = { "tested": tested, "todays_tested": todays_tested, "confirmed": confirmed, "todays_confirmed": todays_confirmed, "deaths": deaths, "todays_deaths": todays_deaths, } ################################################################### # Sanity Check ################################################################### if ((int(todays_tested) >= int(tested)) or (int(todays_confirmed) >= int(confirmed))): # not checking for todays_deaths >= deaths raise DataValidationError("stats.py numbers doesn't make sense") if (int(confirmed) > int(tested)) or (int(deaths) > int(confirmed)): raise DataValidationError("stats.py numbers doesnt make sense") del df, data gc.collect() return stats