Exemple #1
0
def death(canton):
    #get canton
    df = df_import[(df_import["geoRegion"] == canton)]

    #make list of all age categories
    age_list = df["altersklasse_covid19"].unique()

    #get last row of each age category, concat to df_final
    df_final = pd.DataFrame([])

    for a in age_list:
        row = df[df["altersklasse_covid19"] == a].tail(1)[[
            "altersklasse_covid19", "sumTotal"
        ]]
        df_final = pd.concat([df_final, row])

    df_final.columns = ["Altersklasse", "Todesfälle"]

    #export backup to csv
    df_final.to_csv("/root/covid_aargau/backups/death/death_{}_{}.csv".format(
        canton, backdate(0)),
                    index=False)

    #export to csv
    df_final.to_csv(
        "/root/covid_aargau/data/death/death_{}.csv".format(canton),
        index=False)
Exemple #2
0
def covid_basel(canton):
    base_url = "https://raw.githubusercontent.com/openZH/covid_19/master/fallzahlen_kanton_total_csv_v2/COVID19_Fallzahlen_Kanton_{}_total.csv".format(
        canton)
    df_import = pd.read_csv(base_url)

    df = df_import[[
        "date", "ncumul_conf", "current_hosp", "current_icu", "ncumul_deceased"
    ]].copy()
    df["date"] = pd.to_datetime(df["date"])

    #calculate daily data
    df["cases"] = df["ncumul_conf"].diff()
    df["new_hosp"] = df["current_hosp"].diff()
    df["new_icu"] = df["current_icu"].diff()
    df["new_dec"] = df["ncumul_deceased"].diff()

    #get last row and same day from the previous week
    today = df[df['cases'].notnull()].tail(1)
    last_week = df[df["date"] == today.iloc[-1]["date"] - timedelta(days=7)]

    #unite both rows in df, rename columns
    df2 = pd.concat([today, last_week])
    df2.columns = [
        "Datum", "Fälle gesamt", "Infizierte im Spital",
        "davon auf der Intensiv-Station", "Todesfälle gesamt", "Neue Fälle",
        "Veränderung Spital-Belegung", "Veränderung Intensiv-Belegung",
        "Todesfälle neu"
    ]

    #reorder columns
    col_list = df2.columns.tolist()
    myorder = [0, 5, 1, 8, 4, 2, 3, 6, 7]
    col_list = [col_list[i] for i in myorder]
    df3 = df2[col_list]

    #get date for "Zahlen vom..."
    date_current_values = df3.iloc[0]["Datum"].date().strftime("%d.%m.%Y")
    date_current_values = "Zahlen vom " + date_current_values

    #set date as index
    df3.set_index("Datum", inplace=True)

    #all cols to int
    df3 = df3.astype(int)

    #transpose
    df_final = df3[[
        "Neue Fälle", "Fälle gesamt", "Todesfälle neu", "Todesfälle gesamt",
        "Infizierte im Spital", "davon auf der Intensiv-Station"
    ]].T
    df_final.columns = [date_current_values, "vor einer Woche"]

    #make a backup export of the current data
    df_final.to_csv(
        "/root/covid_aargau/backups/daily_data_bz/backup_{}_{}.csv".format(
            backdate(0), canton))

    #export to csv
    df_final.to_csv(
        "/root/covid_aargau/data/only_AG/daily_data_{}.csv".format(canton))
def test_pos(canton):
    dfc = df_import[df_import["geoRegion"] == canton].copy()
    dfc["datum"] = pd.to_datetime(dfc["datum"])

    dfc2 = dfc[dfc["datum"] >= pd.to_datetime("2020-08-31")]
    dfc2.set_index("datum", inplace=True)

    dfc3 = dfc2.resample("W")["entries_pos", "entries_neg"].sum()

    dfc4 = dfc3[dfc3.index < datetime.today()].copy()
    dfc4.index = dfc4.index - timedelta(days=6)
    dfc4.columns = ["positiv", "negativ"]

    dfc4["Positivitätsrate_{}".format(canton)] = (
        (dfc4["positiv"] / (dfc4["positiv"] + dfc4["negativ"])) * 100).round(1)
    if canton != "CH":
        #make a backup export of the current data
        dfc4[["positiv", "negativ"]].T.to_csv(
            "/root/covid_aargau/backups/tests/tests_weekly_{}_{}.csv".format(
                canton, backdate(0)))

        #export to csv
        dfc4[["positiv", "negativ"]].T.to_csv(
            "/root/covid_aargau/data/tests_weekly/tests_weekly_{}.csv".format(
                canton))

    if canton == "CH":
        global dfch
        dfch = pd.concat([dfc4[["Positivitätsrate_{}".format(canton)]], dfch])
    else:
        dfc_final = dfc4[["Positivitätsrate_{}".format(canton)
                          ]].merge(dfch, left_index=True, right_index=True)
        dfc_final.columns = [canton, "Schweiz"]

        #make a backup export of the current data
        dfc_final.to_csv(
            "/root/covid_aargau/backups/positivity/positivity_weekly_{}_{}.csv"
            .format(canton, backdate(0)))

        #export to csv
        dfc_final.to_csv(
            "/root/covid_aargau/data/positivity_weekly/positivity_weekly_{}.csv"
            .format(canton))
def age_dist_cantons(canton):
    base_url = "https://raw.githubusercontent.com/timoll/bag_scrape/master/out/canton_age/"
    url_men = base_url + canton + "/" + "men.csv"
    url_women = base_url + canton + "/" + "women.csv"
    url_total = base_url + canton + "/" + "all.csv"

    df_men = pd.read_csv(url_men)
    sleep(5)
    df_women = pd.read_csv(url_women)
    sleep(5)
    df_total = pd.read_csv(url_total)

    s_men_sum = df_men.loc[:, df_men.columns != "date"].sum()
    s_women_sum = df_women.loc[:, df_women.columns != "date"].sum()
    s_total_sum = df_total.loc[:, df_total.columns != "date"].sum()

    df_men_sum = pd.DataFrame(s_men_sum)
    df_men_sum = df_men_sum.reset_index()
    df_men_sum = df_men_sum.rename(columns={
        "index": "Altersklasse",
        0: "Männer"
    })

    df_women_sum = pd.DataFrame(s_women_sum)
    df_women_sum = df_women_sum.reset_index()
    df_women_sum = df_women_sum.rename(columns={
        "index": "Altersklasse",
        0: "Frauen"
    })

    df_total_sum = pd.DataFrame(s_total_sum)
    df_total_sum = df_total_sum.reset_index()
    df_total_sum = df_total_sum.rename(columns={
        "index": "Altersklasse",
        0: "Total"
    })

    df_gen = df_men_sum.merge(df_women_sum, on="Altersklasse")
    df_all = df_gen.merge(df_total_sum, on="Altersklasse")
    df_all.drop(df_all.tail(1).index, inplace=True)

    #export backup to csv
    df_all.to_csv(
        "/root/covid_aargau/backups/age/altersverteilung_{}_{}.csv".format(
            canton, backdate(0)),
        index=False)

    #export to csv
    df_all.to_csv(
        "/root/covid_aargau/data/altersverteilung_{}.csv".format(canton),
        index=False)
def antigen(canton):

    #get all rows for one canton
    df_anti_c = df_import[df_import["geoRegion"] == canton].copy()

    #format date, to index
    df_anti_c["datum_neu"] = pd.to_datetime(df_anti_c["datum"],
                                            format="%Y-%m-%d")
    df_anti_c.set_index("datum_neu", inplace=True)

    #new column weekday, select only relevant columns
    df_anti_c["weekday"] = df_anti_c.index.weekday
    df_anti_c2 = df_anti_c[[
        "weekday", "entries", "sumTotal", "sum7d", "nachweismethode"
    ]].copy()

    #get 7-day-value for all sundays from november onwards
    df_anti_c2_sun = df_anti_c2[(df_anti_c2["weekday"] == 6) & (
        df_anti_c2.index > pd.to_datetime("2020-11"))].copy()
    df_anti_final = df_anti_c2_sun[["nachweismethode", "sum7d"]].copy()
    df_anti_final["nachweismethode"] = df_anti_final[
        "nachweismethode"].str.replace("Antigen_Schnelltest",
                                       "Antigen-Schnelltest")
    df_anti_final["nachweismethode"] = df_anti_final[
        "nachweismethode"].str.replace("PCR", "PCR (herkömmlich)")

    #date minus six days, in order to display monday
    df_anti_final.index = df_anti_final.index - timedelta(days=6)
    df_anti_final.index = df_anti_final.index.strftime("%d.%m.%Y")

    #pivot to perfection
    cols = []
    for i in df_anti_final.index:
        if i not in cols:
            cols.append(i)
    df_anti_final2 = df_anti_final.pivot_table(index="nachweismethode",
                                               columns=df_anti_final.index,
                                               values="sum7d")
    df_anti_final3 = df_anti_final2[cols]
    #export backup to csv
    df_anti_final3.to_csv(
        "/root/covid_aargau/backups/schnelltests/schnelltests_{}_{}.csv".
        format(canton, backdate(0)))

    #export to csv
    df_anti_final3.to_csv(
        "/root/covid_aargau/data/schnelltests/schnelltests_{}.csv".format(
            canton))
def daily_cases(canton):

    #get canton and relevant columns
    df = df_import[(df_import["geoRegion"] == canton)][[
        "datum", "entries", "mean7d"
    ]]
    df.columns = ["Datum", "Fälle", "7-Tages-Durchschnitt"]

    #add a baseline (for visualization purposes in Datawrapper)
    df["baseline"] = 0

    #export backup to csv
    df.to_csv(
        "/root/covid_aargau/backups/daily_cases/daily_cases_{}_{}.csv".format(
            canton, backdate(0)),
        index=False)

    #export to csv
    df.to_csv("/root/covid_aargau/data/daily_cases/daily_cases_{}.csv".format(
        canton),
              index=False)
def death_getter(canton):
    #get data from github
    df = pd.read_csv(
        "https://raw.githubusercontent.com/daenuprobst/covid19-cases-switzerland/master/covid19_cases_fatalities_switzerland_bag.csv"
    )

    #formatting
    df["date_new"] = pd.to_datetime(df["date"], format="%Y-%m-%d")
    df.pop("date")
    df.set_index("date_new", inplace=True)
    df.rename_axis(None, inplace=True)

    #choose canton
    if canton == "OCH":
        df_ag = df[(df["canton"] == "SG") | (df["canton"] == "TG") |
                   (df["canton"] == "AR") | (df["canton"] == "AI")]
    elif canton == "ZCH":
        df_ag = df[(df["canton"] == "LU") | (df["canton"] == "SZ") |
                   (df["canton"] == "UR") | (df["canton"] == "OW") |
                   (df["canton"] == "NW") | (df["canton"] == "ZG")]

    else:
        df_ag = df[df["canton"] == canton]
    df_ag2 = df_ag.drop(["canton", "cases"], axis=1)

    #groupby age
    s = df_ag2.groupby("age_group")["fatalities"].sum()
    df_final = pd.DataFrame(s)
    df_final.index.name = "Altersklasse"
    df_final.columns = ["Todesfälle"]
    #export backup to csv
    df_final.to_csv(
        "/root/covid_aargau/backups/death/todesfaelle_{}_{}.csv".format(
            canton, backdate(0)))

    #export to csv
    df_final.to_csv("/root/covid_aargau/data/deaths_{}.csv".format(canton))
Exemple #8
0
# In[13]:

#get Nachmeldungen Fälle and Todesfälle
url_yest = "https://raw.githubusercontent.com/makwal/covid_aargau/master/backups/daily_data/backup_{}.csv"

#check if data is updated (updated == yesterday's date)
date_in_df = df_final.iloc[1]["date"]
date_in_df_prev = df_final.iloc[0]["date"]
data_updated = date_in_df == date.today() - timedelta(days=1)

#if data has been updated today...
if data_updated:
    #... and it's monday...
    if date.today().weekday() == 0:
        #... get dfe(arly) from 3 days ago
        dfe = pd.read_csv(url_yest.format(backdate(3)))
    else:
        # or from yesterday on any other day than monday
        dfe = pd.read_csv(url_yest.format(backdate(1)))

#if data hasn't been updated today...
else:
    #... and it's monday...
    if date.today().weekday() == 0:
        #get dfe(arly) from 4 days ago
        dfe = pd.read_csv(url_yest.format(backdate(4)))
    else:
        #get dfe(arly) from 2 days ago
        dfe = pd.read_csv(url_yest.format(backdate(2)))

#"Zahlen vom " day before yesterday
df_travel.columns = df_travel.iloc[1]
df_travel = df_travel.drop([0, 1])

#remove rows with NaN in col header
df_travel = df_travel.loc[:, df_travel.columns.notnull()]

#formatting
df_travel["date"] = pd.to_datetime(df_travel["date"],
                                   errors="coerce").dt.normalize()
df_travel.columns = ["date", "Fälle neu", "aktuell betreut", "Fälle total"]

# In[4]:

#if Monday (weekday == 0), take Friday as latest values
if date.today().weekday() == 0:
    df_travel = df_travel[df_travel["date"] < backdate(2)]
else:
    df_travel = df_travel[df_travel["date"] < backdate(0)]

# In[5]:

#fill NaN values with previous value
df_travel = df_travel.fillna(method='ffill')

#choose relevant columns
df_travel = df_travel[["date", "aktuell betreut"]]

# In[6]:

#replace "n.d." no data with NaN
df_travel.loc[df_travel["aktuell betreut"] == "n.d.",
Exemple #10
0

# In[4]:


#formatting
df_iso["date"] = pd.to_datetime(df_iso["date"], errors="coerce")
df_iso.columns = ["date", "new_isolated", "total_curr_isolated", "total_isolated",
                 "new_quar", "total_curr_quar", "total_quar", "NaN"]

#get rid of last row
df_iso.drop(df_iso.tail(1).index,inplace=True)

#if Monday (weekday == 0), take Friday as latest values
if date.today().weekday() == 0:
    df_iso = df_iso[df_iso["date"] < backdate(2)]
else:
    df_iso = df_iso[df_iso["date"] < backdate(0)]


# In[5]:


#fill NaN values with previous value
df_iso = df_iso.fillna(method='ffill')

#get relevant rows and columns
df_iso_time = df_iso[["date", "total_curr_isolated", "total_curr_quar"]].copy()

#formatting
df_iso_time["date"] = df_iso_time["date"].dt.normalize()
    by="total_vaccinations_per_hundred", ascending=False)

#prepare german country names and iso_codes
dfc = df_import_countries[["name", "alpha2", "alpha3"]].copy()
dfc["alpha3"] = dfc["alpha3"].str.upper()

#merge primary dataset with german country names
df3 = df2.merge(dfc, left_on="iso_code", right_on="alpha3")

#formatting for datawrapper visualization purposes
df3["date"] = pd.to_datetime(df3["date"]).dt.strftime("%d.%m.%Y")
df3["name"] = df3["name"].astype(str) + " ^" + df3["date"] + "^"
df3["alpha2"] = ":" + df3["alpha2"] + ": " + df3["name"]

#create df_final
df_final = df3[["alpha2", "total_vaccinations_per_hundred", "vaccines"]].copy()
df_final.columns = [
    "Land ^aktualisiert^", "Impfungen pro 100 Menschen", "Impfstoffe"
]

# In[ ]:

#make a backup export of the current data
df_final.to_csv("/root/covid_aargau/backups/vacc_world/backup_{}.csv".format(
    backdate(0)),
                index=False)

#export to csv
df_final.to_csv("/root/covid_aargau/data/vaccination/vacc_world.csv",
                index=False)
Exemple #12
0
def csv_generator(canton):
    #read csv from Github
    source_url = "https://raw.githubusercontent.com/maekke/bag_data/master/daten_pro_kanton/bag_data_{}.csv".format(canton)
    source_url_all = "https://raw.githubusercontent.com/maekke/bag_data/master/bag_weekly_data.csv"
    
    df_testpos = pd.read_csv(source_url)
    sleep(2)
    df_testpos_all = pd.read_csv(source_url_all)
    
    df_testpos["positive_tests"] = (df_testpos["total_number_of_tests"] / 100 * df_testpos["positivity_rate_percent"]).round(0).astype(int)
    df_testpos["negative_tests"] = df_testpos["total_number_of_tests"] - df_testpos["positive_tests"]
    
    #convert calendar week to first day of respective week (date)
    def get_weekday(s):
        prefix = "2020-W"
        cal_week = prefix + str(s)
        first_weekday = datetime.strptime(cal_week + '-1', "%G-W%V-%u")
        return first_weekday

    df_testpos["week_date"] = df_testpos["week"].apply(get_weekday)
    df_testpos_all["week_date"] = df_testpos_all["week"].apply(get_weekday)

    #formatting
    df_testpos["week_date"] = df_testpos["week_date"].dt.strftime("%d.%m.%Y")
    df_testpos_all["week_date"] = df_testpos_all["week_date"].dt.strftime("%d.%m.%Y")
    df_testpos.columns = ["Kalenderwoche",
                             "Tests total",
                             "Positivitätsrate",
                            "source_file",
                            "positiv",
                            "negativ",
                            "Datum"]
    
    #get relevant columns
    df_tests = df_testpos[["Datum", "positiv", "negativ"]].T
    df_pos = df_testpos[["Kalenderwoche", "Datum", "Positivitätsrate"]]
    df_pos = df_pos.join(df_testpos_all["positivity_rate_percent"])
    df_pos.columns = ["Kalenderwoche", "Datum", canton, "Schweiz"]

    #make a backup export of the current data
    df_tests.to_csv("/root/covid_aargau/backups/tests/tests_weekly_{}_{}.csv".format(canton, backdate(0)))
    df_pos.to_csv("/root/covid_aargau/backups/positivity/positivity_weekly_{}_{}.csv".format(canton, backdate(0)))


    #export to csv
    df_tests.to_csv("/root/covid_aargau/data/tests_weekly_{}.csv".format(canton), header=None)
    df_pos.to_csv("/root/covid_aargau/data/positivity_weekly_{}.csv".format(canton), index=False)