def gender_age_cases(): """Fetch total country covid-19 death data.""" def _region_to_country(x): return x\ .groupby(['date','age','sex'])\ .size()\ .reset_index(name = 'deaths') # Poland pl = covid19poland.covid_death_cases(from_github=True) pl = _region_to_country(pl) pl['country'] = 'PL' # Czechia cz = _src.czechia() cz = _region_to_country(cz) cz['country'] = 'CZ' # match same dates dt_range = pd.date_range(start=min(cz.date.min(), pl.date.min()), end=min(cz.date.max(), pl.date.max())) cz = cz[cz.date.isin(dt_range)] pl = pl[pl.date.isin(dt_range)] # merge df = pd.concat([pl, cz]) return df
def country_deaths(): """Fetch total country covid-19 death data.""" def _region_to_country(x): return x\ .groupby(['date'])\ .size()\ .reset_index(name = 'deaths') # Poland pl = covid19poland.covid_death_cases(from_github=True) pl = _region_to_country(pl) pl['country'] = 'PL' # Czechia cz = _src.czechia() cz = _region_to_country(cz) cz['country'] = 'CZ' # Sweden se = sweden_resample() se = se[['date','deaths']]\ .groupby(['date'])\ .sum()\ .reset_index() se['country'] = 'SE' # match same dates dt_range = pd.date_range(start=min(cz.date.min(), pl.date.min(), se.date.min()), end=min(cz.date.max(), pl.date.max(), se.date.max())) cz = cz[cz.date.isin(dt_range)] pl = pl[pl.date.isin(dt_range)] se = se[se.date.isin(dt_range)] # merge df = pd.concat([pl, cz, se]) # fill missing missing_rows = {'date': [], 'country': [], 'deaths': []} for dt in dt_range: for c in df.country.unique(): row = df[(df.date == dt) & (df.country == c)] if row.empty: missing_rows['date'].append(dt) missing_rows['country'].append(c) missing_rows['deaths'].append(0) df = pd.concat([df, pd.DataFrame(missing_rows)], ignore_index=True) # return df = df\ .sort_values(['date','country'])\ .reset_index(drop = True) return df
def test_twitter_deaths(self): # get data cases = PL.covid_death_cases() x = cases.groupby(["date"]).size().reset_index(name='case_agg') # reference ref,_ = covid19dh.covid19("Poland", verbose = False) ref = ref[["date","deaths"]] ref['deaths'] = ref.deaths.diff() # merge x = x.merge(ref, on = "date") # parse not matching x = x[x.case_agg != x.deaths]
def get_covid_death_cases(self, *args, **kw): x = PL.covid_death_cases(*args, **kw) self.assertIsInstance(x, pd.DataFrame) self.assertIn("date", x.columns) self.assertIn("age", x.columns) self.assertIn("sex", x.columns) self.assertIn("place", x.columns) self.assertIn("NUTS2", x.columns) self.assertIn("NUTS3", x.columns) self.assertIn("comorbid", x.columns) self.assertIn("serious", x.columns) self.assertIn("comorbid", x.columns) return x
def get_data(): """Get total Covid-19 data for CZ+IT+SE+PL.""" def _region_to_country(x): """Aggregate region data to country data.""" # aggregate return x\ .groupby(['date','age','sex'])\ .size()\ .reset_index(name = 'deaths') # Poland pl = covid19poland.covid_death_cases(from_github=True) pl = _region_to_country(pl) pl['country'] = 'PL' # Czechia cz = covid19czechia.covid_deaths(level=3, usecache=True) cz = _region_to_country(cz) cz['country'] = 'CZ' # Italy it = covid19italy.covid_deaths() it['country'] = 'IT' # match same dates dt_range = pd.date_range(start=min(cz.date.min(), pl.date.min()), end=min(cz.date.max(), pl.date.max())) cz = cz[cz.date.isin(dt_range)] pl = pl[pl.date.isin(dt_range)] # merge df = pd.concat([pl, cz]) # reindex to people df = pd.DataFrame(df.values.repeat(df.deaths.apply(int), axis=0), columns=df.columns) # merge df = pd.concat([df, it]) # drop deaths column df = df.drop(['deaths'], axis=1) df['age'] = df.age.apply(float) # return return df