def helper_read_from_cache_or_fetch_from_url(url: str, file_cache: str, readFromCache: bool = True): """ readFromCache=True -> not calling the API, but returning cached data readFromCache=False -> calling the API, and writing cache to filesystem """ if readFromCache: readFromCache = helper.check_cache_file_available_and_recent( fname=file_cache, max_age=1800, verbose=False) json_cont = [] if readFromCache == True: # read from cache with open(file_cache, mode='r', encoding='utf-8') as json_file: json_cont = json.load(json_file) elif readFromCache == False: # fetch and write to cache json_cont = fetch_json_as_dict_from_url_and_reduce_to_list(url) with open(file_cache, mode='w', encoding='utf-8', newline='\n') as fh: json.dump(json_cont, fh, ensure_ascii=False) return json_cont
# Built-in/Generic Imports import urllib.request import pandas as pd import matplotlib.pyplot as plt import locale # my helper modules import helper # DE date format: Okt instead of Oct locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') dataFileSource = 'cache\de-vaccination.csv' if not helper.check_cache_file_available_and_recent( fname=dataFileSource, max_age=1800, verbose=False): url = "https://raw.githubusercontent.com/robert-koch-institut/COVID-19-Impfungen_in_Deutschland/master/Aktuell_Deutschland_Bundeslaender_COVID-19-Impfungen.csv" filedata = urllib.request.urlopen(url) datatowrite = filedata.read() with open(dataFileSource, mode='wb') as f: f.write(datatowrite) df = pd.read_csv(dataFileSource, sep=",") # use date as index df['Impfdatum'] = pd.to_datetime(df['Impfdatum'], format='%Y-%m-%d') df.set_index(['Impfdatum'], inplace=True) sum_doses = df['Anzahl'].sum() # use date as index
# drop the 2 Feb data rows (of 0 deaths) df2.drop([0, 1], inplace=True) df_covid_2020 = pd.DataFrame() df_covid_2020['Deaths_Covid_2020'] = df1['Deaths_Covid_2020'].append( df2['Deaths_Covid_2020'], ignore_index=True) df_covid_2020['Deaths_Covid_2020_roll'] = df_covid_2020[ 'Deaths_Covid_2020'].rolling(window=7, min_periods=1).mean().round(1) # print(df_covid_2020.tail()) del df1, df2 # 2. fetch and parse Excel of mortality data from Destatis excelFile = 'cache\de-mortality.xlsx' if not helper.check_cache_file_available_and_recent( fname=excelFile, max_age=1800, verbose=False): url = "https://www.destatis.de/DE/Themen/Gesellschaft-Umwelt/Bevoelkerung/Sterbefaelle-Lebenserwartung/Tabellen/sonderauswertung-sterbefaelle.xlsx?__blob=publicationFile" filedata = urllib.request.urlopen(url) datatowrite = filedata.read() with open(excelFile, mode='wb') as f: f.write(datatowrite) # data_only : read values instead of formulas workbookIn = openpyxl.load_workbook(excelFile, data_only=True) sheetIn = workbookIn['D_2016_2020_Tage'] l_dates = [] l_deaths2020 = [] l_deaths2019 = [] l_deaths2018 = [] l_deaths2017 = []
d2["Code"] = code d2['Continent'] = continent d2["Population"] = pop d_countries_ref[name] = d2 return d_countries_ref d_countries_ref = read_ref_data_countries() # d_ref_country_database = helper.read_json_file( # 'data/ref_country_database.json') d_selected_countries = read_ref_selected_countries() if not helper.check_cache_file_available_and_recent( fname=file_cache, max_age=1800, verbose=True): download_new_data() d_countries_timeseries = read_json_data() check_for_further_interesting_countries() # TODO: deprecated # fit_doubling_time() extract_latest_date_data() # deprecated: extract_latest_date_data_selected() export_time_series_all_countries()
df['-25%'] += gt_m025p * 1 df['-50%'] += gt_m050p * 1 df['-75%'] += gt_m075p * 1 df['-100%'] += gt_m100p * 1 count += 1 # if count >= 10: # break df.to_csv('cache/hist-de-districts.csv') return df # TODO # os.remove("cache/hist-de-districts.csv") if helper.check_cache_file_available_and_recent( fname='cache/hist-de-districts.csv', max_age=3600, verbose=True) == False: df = read_and_prepare_data() else: df = pd.read_csv('cache/hist-de-districts.csv') df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d') df.set_index(['Date'], inplace=True) def CntToPerc(x): return x / 412 * 100 def PercToCnt(x): return x / 100 * 412