def death(canton): #get canton df = df_import[(df_import["geoRegion"] == canton)] #make list of all age categories age_list = df["altersklasse_covid19"].unique() #get last row of each age category, concat to df_final df_final = pd.DataFrame([]) for a in age_list: row = df[df["altersklasse_covid19"] == a].tail(1)[[ "altersklasse_covid19", "sumTotal" ]] df_final = pd.concat([df_final, row]) df_final.columns = ["Altersklasse", "Todesfälle"] #export backup to csv df_final.to_csv("/root/covid_aargau/backups/death/death_{}_{}.csv".format( canton, backdate(0)), index=False) #export to csv df_final.to_csv( "/root/covid_aargau/data/death/death_{}.csv".format(canton), index=False)
def covid_basel(canton): base_url = "https://raw.githubusercontent.com/openZH/covid_19/master/fallzahlen_kanton_total_csv_v2/COVID19_Fallzahlen_Kanton_{}_total.csv".format( canton) df_import = pd.read_csv(base_url) df = df_import[[ "date", "ncumul_conf", "current_hosp", "current_icu", "ncumul_deceased" ]].copy() df["date"] = pd.to_datetime(df["date"]) #calculate daily data df["cases"] = df["ncumul_conf"].diff() df["new_hosp"] = df["current_hosp"].diff() df["new_icu"] = df["current_icu"].diff() df["new_dec"] = df["ncumul_deceased"].diff() #get last row and same day from the previous week today = df[df['cases'].notnull()].tail(1) last_week = df[df["date"] == today.iloc[-1]["date"] - timedelta(days=7)] #unite both rows in df, rename columns df2 = pd.concat([today, last_week]) df2.columns = [ "Datum", "Fälle gesamt", "Infizierte im Spital", "davon auf der Intensiv-Station", "Todesfälle gesamt", "Neue Fälle", "Veränderung Spital-Belegung", "Veränderung Intensiv-Belegung", "Todesfälle neu" ] #reorder columns col_list = df2.columns.tolist() myorder = [0, 5, 1, 8, 4, 2, 3, 6, 7] col_list = [col_list[i] for i in myorder] df3 = df2[col_list] #get date for "Zahlen vom..." date_current_values = df3.iloc[0]["Datum"].date().strftime("%d.%m.%Y") date_current_values = "Zahlen vom " + date_current_values #set date as index df3.set_index("Datum", inplace=True) #all cols to int df3 = df3.astype(int) #transpose df_final = df3[[ "Neue Fälle", "Fälle gesamt", "Todesfälle neu", "Todesfälle gesamt", "Infizierte im Spital", "davon auf der Intensiv-Station" ]].T df_final.columns = [date_current_values, "vor einer Woche"] #make a backup export of the current data df_final.to_csv( "/root/covid_aargau/backups/daily_data_bz/backup_{}_{}.csv".format( backdate(0), canton)) #export to csv df_final.to_csv( "/root/covid_aargau/data/only_AG/daily_data_{}.csv".format(canton))
def test_pos(canton): dfc = df_import[df_import["geoRegion"] == canton].copy() dfc["datum"] = pd.to_datetime(dfc["datum"]) dfc2 = dfc[dfc["datum"] >= pd.to_datetime("2020-08-31")] dfc2.set_index("datum", inplace=True) dfc3 = dfc2.resample("W")["entries_pos", "entries_neg"].sum() dfc4 = dfc3[dfc3.index < datetime.today()].copy() dfc4.index = dfc4.index - timedelta(days=6) dfc4.columns = ["positiv", "negativ"] dfc4["Positivitätsrate_{}".format(canton)] = ( (dfc4["positiv"] / (dfc4["positiv"] + dfc4["negativ"])) * 100).round(1) if canton != "CH": #make a backup export of the current data dfc4[["positiv", "negativ"]].T.to_csv( "/root/covid_aargau/backups/tests/tests_weekly_{}_{}.csv".format( canton, backdate(0))) #export to csv dfc4[["positiv", "negativ"]].T.to_csv( "/root/covid_aargau/data/tests_weekly/tests_weekly_{}.csv".format( canton)) if canton == "CH": global dfch dfch = pd.concat([dfc4[["Positivitätsrate_{}".format(canton)]], dfch]) else: dfc_final = dfc4[["Positivitätsrate_{}".format(canton) ]].merge(dfch, left_index=True, right_index=True) dfc_final.columns = [canton, "Schweiz"] #make a backup export of the current data dfc_final.to_csv( "/root/covid_aargau/backups/positivity/positivity_weekly_{}_{}.csv" .format(canton, backdate(0))) #export to csv dfc_final.to_csv( "/root/covid_aargau/data/positivity_weekly/positivity_weekly_{}.csv" .format(canton))
def age_dist_cantons(canton): base_url = "https://raw.githubusercontent.com/timoll/bag_scrape/master/out/canton_age/" url_men = base_url + canton + "/" + "men.csv" url_women = base_url + canton + "/" + "women.csv" url_total = base_url + canton + "/" + "all.csv" df_men = pd.read_csv(url_men) sleep(5) df_women = pd.read_csv(url_women) sleep(5) df_total = pd.read_csv(url_total) s_men_sum = df_men.loc[:, df_men.columns != "date"].sum() s_women_sum = df_women.loc[:, df_women.columns != "date"].sum() s_total_sum = df_total.loc[:, df_total.columns != "date"].sum() df_men_sum = pd.DataFrame(s_men_sum) df_men_sum = df_men_sum.reset_index() df_men_sum = df_men_sum.rename(columns={ "index": "Altersklasse", 0: "Männer" }) df_women_sum = pd.DataFrame(s_women_sum) df_women_sum = df_women_sum.reset_index() df_women_sum = df_women_sum.rename(columns={ "index": "Altersklasse", 0: "Frauen" }) df_total_sum = pd.DataFrame(s_total_sum) df_total_sum = df_total_sum.reset_index() df_total_sum = df_total_sum.rename(columns={ "index": "Altersklasse", 0: "Total" }) df_gen = df_men_sum.merge(df_women_sum, on="Altersklasse") df_all = df_gen.merge(df_total_sum, on="Altersklasse") df_all.drop(df_all.tail(1).index, inplace=True) #export backup to csv df_all.to_csv( "/root/covid_aargau/backups/age/altersverteilung_{}_{}.csv".format( canton, backdate(0)), index=False) #export to csv df_all.to_csv( "/root/covid_aargau/data/altersverteilung_{}.csv".format(canton), index=False)
def antigen(canton): #get all rows for one canton df_anti_c = df_import[df_import["geoRegion"] == canton].copy() #format date, to index df_anti_c["datum_neu"] = pd.to_datetime(df_anti_c["datum"], format="%Y-%m-%d") df_anti_c.set_index("datum_neu", inplace=True) #new column weekday, select only relevant columns df_anti_c["weekday"] = df_anti_c.index.weekday df_anti_c2 = df_anti_c[[ "weekday", "entries", "sumTotal", "sum7d", "nachweismethode" ]].copy() #get 7-day-value for all sundays from november onwards df_anti_c2_sun = df_anti_c2[(df_anti_c2["weekday"] == 6) & ( df_anti_c2.index > pd.to_datetime("2020-11"))].copy() df_anti_final = df_anti_c2_sun[["nachweismethode", "sum7d"]].copy() df_anti_final["nachweismethode"] = df_anti_final[ "nachweismethode"].str.replace("Antigen_Schnelltest", "Antigen-Schnelltest") df_anti_final["nachweismethode"] = df_anti_final[ "nachweismethode"].str.replace("PCR", "PCR (herkömmlich)") #date minus six days, in order to display monday df_anti_final.index = df_anti_final.index - timedelta(days=6) df_anti_final.index = df_anti_final.index.strftime("%d.%m.%Y") #pivot to perfection cols = [] for i in df_anti_final.index: if i not in cols: cols.append(i) df_anti_final2 = df_anti_final.pivot_table(index="nachweismethode", columns=df_anti_final.index, values="sum7d") df_anti_final3 = df_anti_final2[cols] #export backup to csv df_anti_final3.to_csv( "/root/covid_aargau/backups/schnelltests/schnelltests_{}_{}.csv". format(canton, backdate(0))) #export to csv df_anti_final3.to_csv( "/root/covid_aargau/data/schnelltests/schnelltests_{}.csv".format( canton))
def daily_cases(canton): #get canton and relevant columns df = df_import[(df_import["geoRegion"] == canton)][[ "datum", "entries", "mean7d" ]] df.columns = ["Datum", "Fälle", "7-Tages-Durchschnitt"] #add a baseline (for visualization purposes in Datawrapper) df["baseline"] = 0 #export backup to csv df.to_csv( "/root/covid_aargau/backups/daily_cases/daily_cases_{}_{}.csv".format( canton, backdate(0)), index=False) #export to csv df.to_csv("/root/covid_aargau/data/daily_cases/daily_cases_{}.csv".format( canton), index=False)
def death_getter(canton): #get data from github df = pd.read_csv( "https://raw.githubusercontent.com/daenuprobst/covid19-cases-switzerland/master/covid19_cases_fatalities_switzerland_bag.csv" ) #formatting df["date_new"] = pd.to_datetime(df["date"], format="%Y-%m-%d") df.pop("date") df.set_index("date_new", inplace=True) df.rename_axis(None, inplace=True) #choose canton if canton == "OCH": df_ag = df[(df["canton"] == "SG") | (df["canton"] == "TG") | (df["canton"] == "AR") | (df["canton"] == "AI")] elif canton == "ZCH": df_ag = df[(df["canton"] == "LU") | (df["canton"] == "SZ") | (df["canton"] == "UR") | (df["canton"] == "OW") | (df["canton"] == "NW") | (df["canton"] == "ZG")] else: df_ag = df[df["canton"] == canton] df_ag2 = df_ag.drop(["canton", "cases"], axis=1) #groupby age s = df_ag2.groupby("age_group")["fatalities"].sum() df_final = pd.DataFrame(s) df_final.index.name = "Altersklasse" df_final.columns = ["Todesfälle"] #export backup to csv df_final.to_csv( "/root/covid_aargau/backups/death/todesfaelle_{}_{}.csv".format( canton, backdate(0))) #export to csv df_final.to_csv("/root/covid_aargau/data/deaths_{}.csv".format(canton))
# In[13]: #get Nachmeldungen Fälle and Todesfälle url_yest = "https://raw.githubusercontent.com/makwal/covid_aargau/master/backups/daily_data/backup_{}.csv" #check if data is updated (updated == yesterday's date) date_in_df = df_final.iloc[1]["date"] date_in_df_prev = df_final.iloc[0]["date"] data_updated = date_in_df == date.today() - timedelta(days=1) #if data has been updated today... if data_updated: #... and it's monday... if date.today().weekday() == 0: #... get dfe(arly) from 3 days ago dfe = pd.read_csv(url_yest.format(backdate(3))) else: # or from yesterday on any other day than monday dfe = pd.read_csv(url_yest.format(backdate(1))) #if data hasn't been updated today... else: #... and it's monday... if date.today().weekday() == 0: #get dfe(arly) from 4 days ago dfe = pd.read_csv(url_yest.format(backdate(4))) else: #get dfe(arly) from 2 days ago dfe = pd.read_csv(url_yest.format(backdate(2))) #"Zahlen vom " day before yesterday
df_travel.columns = df_travel.iloc[1] df_travel = df_travel.drop([0, 1]) #remove rows with NaN in col header df_travel = df_travel.loc[:, df_travel.columns.notnull()] #formatting df_travel["date"] = pd.to_datetime(df_travel["date"], errors="coerce").dt.normalize() df_travel.columns = ["date", "Fälle neu", "aktuell betreut", "Fälle total"] # In[4]: #if Monday (weekday == 0), take Friday as latest values if date.today().weekday() == 0: df_travel = df_travel[df_travel["date"] < backdate(2)] else: df_travel = df_travel[df_travel["date"] < backdate(0)] # In[5]: #fill NaN values with previous value df_travel = df_travel.fillna(method='ffill') #choose relevant columns df_travel = df_travel[["date", "aktuell betreut"]] # In[6]: #replace "n.d." no data with NaN df_travel.loc[df_travel["aktuell betreut"] == "n.d.",
# In[4]: #formatting df_iso["date"] = pd.to_datetime(df_iso["date"], errors="coerce") df_iso.columns = ["date", "new_isolated", "total_curr_isolated", "total_isolated", "new_quar", "total_curr_quar", "total_quar", "NaN"] #get rid of last row df_iso.drop(df_iso.tail(1).index,inplace=True) #if Monday (weekday == 0), take Friday as latest values if date.today().weekday() == 0: df_iso = df_iso[df_iso["date"] < backdate(2)] else: df_iso = df_iso[df_iso["date"] < backdate(0)] # In[5]: #fill NaN values with previous value df_iso = df_iso.fillna(method='ffill') #get relevant rows and columns df_iso_time = df_iso[["date", "total_curr_isolated", "total_curr_quar"]].copy() #formatting df_iso_time["date"] = df_iso_time["date"].dt.normalize()
by="total_vaccinations_per_hundred", ascending=False) #prepare german country names and iso_codes dfc = df_import_countries[["name", "alpha2", "alpha3"]].copy() dfc["alpha3"] = dfc["alpha3"].str.upper() #merge primary dataset with german country names df3 = df2.merge(dfc, left_on="iso_code", right_on="alpha3") #formatting for datawrapper visualization purposes df3["date"] = pd.to_datetime(df3["date"]).dt.strftime("%d.%m.%Y") df3["name"] = df3["name"].astype(str) + " ^" + df3["date"] + "^" df3["alpha2"] = ":" + df3["alpha2"] + ": " + df3["name"] #create df_final df_final = df3[["alpha2", "total_vaccinations_per_hundred", "vaccines"]].copy() df_final.columns = [ "Land ^aktualisiert^", "Impfungen pro 100 Menschen", "Impfstoffe" ] # In[ ]: #make a backup export of the current data df_final.to_csv("/root/covid_aargau/backups/vacc_world/backup_{}.csv".format( backdate(0)), index=False) #export to csv df_final.to_csv("/root/covid_aargau/data/vaccination/vacc_world.csv", index=False)
def csv_generator(canton): #read csv from Github source_url = "https://raw.githubusercontent.com/maekke/bag_data/master/daten_pro_kanton/bag_data_{}.csv".format(canton) source_url_all = "https://raw.githubusercontent.com/maekke/bag_data/master/bag_weekly_data.csv" df_testpos = pd.read_csv(source_url) sleep(2) df_testpos_all = pd.read_csv(source_url_all) df_testpos["positive_tests"] = (df_testpos["total_number_of_tests"] / 100 * df_testpos["positivity_rate_percent"]).round(0).astype(int) df_testpos["negative_tests"] = df_testpos["total_number_of_tests"] - df_testpos["positive_tests"] #convert calendar week to first day of respective week (date) def get_weekday(s): prefix = "2020-W" cal_week = prefix + str(s) first_weekday = datetime.strptime(cal_week + '-1', "%G-W%V-%u") return first_weekday df_testpos["week_date"] = df_testpos["week"].apply(get_weekday) df_testpos_all["week_date"] = df_testpos_all["week"].apply(get_weekday) #formatting df_testpos["week_date"] = df_testpos["week_date"].dt.strftime("%d.%m.%Y") df_testpos_all["week_date"] = df_testpos_all["week_date"].dt.strftime("%d.%m.%Y") df_testpos.columns = ["Kalenderwoche", "Tests total", "Positivitätsrate", "source_file", "positiv", "negativ", "Datum"] #get relevant columns df_tests = df_testpos[["Datum", "positiv", "negativ"]].T df_pos = df_testpos[["Kalenderwoche", "Datum", "Positivitätsrate"]] df_pos = df_pos.join(df_testpos_all["positivity_rate_percent"]) df_pos.columns = ["Kalenderwoche", "Datum", canton, "Schweiz"] #make a backup export of the current data df_tests.to_csv("/root/covid_aargau/backups/tests/tests_weekly_{}_{}.csv".format(canton, backdate(0))) df_pos.to_csv("/root/covid_aargau/backups/positivity/positivity_weekly_{}_{}.csv".format(canton, backdate(0))) #export to csv df_tests.to_csv("/root/covid_aargau/data/tests_weekly_{}.csv".format(canton), header=None) df_pos.to_csv("/root/covid_aargau/data/positivity_weekly_{}.csv".format(canton), index=False)