Esempio n. 1
0
def gender_age_cases():
    """Fetch total country covid-19 death data."""
    def _region_to_country(x):
        return x\
            .groupby(['date','age','sex'])\
            .size()\
            .reset_index(name = 'deaths')

    # Poland
    pl = covid19poland.covid_death_cases(from_github=True)
    pl = _region_to_country(pl)
    pl['country'] = 'PL'
    # Czechia
    cz = _src.czechia()
    cz = _region_to_country(cz)
    cz['country'] = 'CZ'

    # match same dates
    dt_range = pd.date_range(start=min(cz.date.min(), pl.date.min()),
                             end=min(cz.date.max(), pl.date.max()))
    cz = cz[cz.date.isin(dt_range)]
    pl = pl[pl.date.isin(dt_range)]
    # merge
    df = pd.concat([pl, cz])

    return df
Esempio n. 2
0
def country_deaths():
    """Fetch total country covid-19 death data."""
    def _region_to_country(x):
        return x\
            .groupby(['date'])\
            .size()\
            .reset_index(name = 'deaths')

    # Poland
    pl = covid19poland.covid_death_cases(from_github=True)
    pl = _region_to_country(pl)
    pl['country'] = 'PL'
    # Czechia
    cz = _src.czechia()
    cz = _region_to_country(cz)
    cz['country'] = 'CZ'
    # Sweden
    se = sweden_resample()
    se = se[['date','deaths']]\
        .groupby(['date'])\
        .sum()\
        .reset_index()
    se['country'] = 'SE'

    # match same dates
    dt_range = pd.date_range(start=min(cz.date.min(), pl.date.min(),
                                       se.date.min()),
                             end=min(cz.date.max(), pl.date.max(),
                                     se.date.max()))
    cz = cz[cz.date.isin(dt_range)]
    pl = pl[pl.date.isin(dt_range)]
    se = se[se.date.isin(dt_range)]
    # merge
    df = pd.concat([pl, cz, se])

    # fill missing
    missing_rows = {'date': [], 'country': [], 'deaths': []}
    for dt in dt_range:
        for c in df.country.unique():
            row = df[(df.date == dt) & (df.country == c)]
            if row.empty:
                missing_rows['date'].append(dt)
                missing_rows['country'].append(c)
                missing_rows['deaths'].append(0)
    df = pd.concat([df, pd.DataFrame(missing_rows)], ignore_index=True)

    # return
    df = df\
        .sort_values(['date','country'])\
        .reset_index(drop = True)
    return df
 def test_twitter_deaths(self):
     # get data
     cases = PL.covid_death_cases()
     x = cases.groupby(["date"]).size().reset_index(name='case_agg')
     
     # reference
     ref,_ = covid19dh.covid19("Poland", verbose = False)
     ref = ref[["date","deaths"]]
     ref['deaths'] = ref.deaths.diff()
     
     # merge
     x = x.merge(ref, on = "date")
     
     # parse not matching
     x = x[x.case_agg != x.deaths]
Esempio n. 4
0
    def get_covid_death_cases(self, *args, **kw):
        x = PL.covid_death_cases(*args, **kw)

        self.assertIsInstance(x, pd.DataFrame)
        self.assertIn("date", x.columns)
        self.assertIn("age", x.columns)
        self.assertIn("sex", x.columns)
        self.assertIn("place", x.columns)
        self.assertIn("NUTS2", x.columns)
        self.assertIn("NUTS3", x.columns)
        self.assertIn("comorbid", x.columns)
        self.assertIn("serious", x.columns)
        self.assertIn("comorbid", x.columns)

        return x
Esempio n. 5
0
def get_data():
    """Get total Covid-19 data for CZ+IT+SE+PL."""
    def _region_to_country(x):
        """Aggregate region data to country data."""
        # aggregate
        return x\
            .groupby(['date','age','sex'])\
            .size()\
            .reset_index(name = 'deaths')

    # Poland
    pl = covid19poland.covid_death_cases(from_github=True)
    pl = _region_to_country(pl)
    pl['country'] = 'PL'
    # Czechia
    cz = covid19czechia.covid_deaths(level=3, usecache=True)
    cz = _region_to_country(cz)
    cz['country'] = 'CZ'
    # Italy
    it = covid19italy.covid_deaths()
    it['country'] = 'IT'
    # match same dates
    dt_range = pd.date_range(start=min(cz.date.min(), pl.date.min()),
                             end=min(cz.date.max(), pl.date.max()))
    cz = cz[cz.date.isin(dt_range)]
    pl = pl[pl.date.isin(dt_range)]
    # merge
    df = pd.concat([pl, cz])
    # reindex to people
    df = pd.DataFrame(df.values.repeat(df.deaths.apply(int), axis=0),
                      columns=df.columns)
    # merge
    df = pd.concat([df, it])
    # drop deaths column
    df = df.drop(['deaths'], axis=1)
    df['age'] = df.age.apply(float)
    # return
    return df