def main():
    # Load data
    df = pd.read_csv(DATA_URL)

    # Check 1
    cols = ["datum", "vakcina", "kraj_nuts_kod", "kraj_nazev", "vekova_skupina", "prvnich_davek", "druhych_davek", "celkem_davek"]
    if not all([col in df.columns for col in cols]):
        raise Exception("API changed")

    # Column renaming
    df = df.rename(columns={
        "datum": "date",
        "kraj_nazev": "region",
        "prvnich_davek": "people_vaccinated",
        "druhych_davek": "people_fully_vaccinated",
        "celkem_davek": "total_vaccinations"
    })

    # Add counts per day
    df = df.groupby(by=["date", "region"]).agg(
        people_vaccinated=("people_vaccinated", sum),
        people_fully_vaccinated=("people_fully_vaccinated", sum),
        total_vaccinations=("total_vaccinations", sum)
    ).reset_index()

    # Check 2
    if not (df["total_vaccinations"] == df["people_vaccinated"] + df["people_fully_vaccinated"]).all():
        raise Exception("Error in columns. dose_1 + dose_2 != total_doses")

    # Rename regions
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "location"] = COUNTRY

    # ISO
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Compute cumsums
    df = df.sort_values(by="date")
    df.loc[:, "total_vaccinations"] = df.groupby("region")["total_vaccinations"].cumsum().values
    df.loc[:, "people_vaccinated"] = df.groupby("region")["people_vaccinated"].cumsum().values
    df.loc[:, "people_fully_vaccinated"] = df.groupby("region")["people_fully_vaccinated"].cumsum().values

    # Export
    export_data(
        df=df,
        data_url_reference=DATA_URL_REFERENCE,
        output_file=OUTPUT_FILE
    )
Example #2
0
 def _postprocess(self, df):
     df = ISODB().merge(df, country_iso=self.country_iso, mode=self.mode_iso_merge)
     df = df.sort_values(by="date")
     for field in self.do_cumsum_fields:
         df[field] = df.groupby("region")[field].cumsum().values
     #  TODO: Insert here population info (need path to population.csv as class attribute)
     return df