コード例 #1
0
def helper_read_from_cache_or_fetch_from_url(url: str, file_cache: str, readFromCache: bool = True):
    """
    readFromCache=True -> not calling the API, but returning cached data
    readFromCache=False -> calling the API, and writing cache to filesystem
    """
    if readFromCache:
        readFromCache = helper.check_cache_file_available_and_recent(
            fname=file_cache, max_age=1800, verbose=False)

    json_cont = []
    if readFromCache == True:  # read from cache
        with open(file_cache, mode='r', encoding='utf-8') as json_file:
            json_cont = json.load(json_file)
    elif readFromCache == False:  # fetch and write to cache
        json_cont = fetch_json_as_dict_from_url_and_reduce_to_list(url)
        with open(file_cache, mode='w', encoding='utf-8', newline='\n') as fh:
            json.dump(json_cont, fh, ensure_ascii=False)
    return json_cont
# Built-in/Generic Imports
import urllib.request

import pandas as pd
import matplotlib.pyplot as plt
import locale

# my helper modules
import helper

# DE date format: Okt instead of Oct
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')

dataFileSource = 'cache\de-vaccination.csv'

if not helper.check_cache_file_available_and_recent(
        fname=dataFileSource, max_age=1800, verbose=False):
    url = "https://raw.githubusercontent.com/robert-koch-institut/COVID-19-Impfungen_in_Deutschland/master/Aktuell_Deutschland_Bundeslaender_COVID-19-Impfungen.csv"
    filedata = urllib.request.urlopen(url)
    datatowrite = filedata.read()
    with open(dataFileSource, mode='wb') as f:
        f.write(datatowrite)

df = pd.read_csv(dataFileSource, sep=",")

# use date as index
df['Impfdatum'] = pd.to_datetime(df['Impfdatum'], format='%Y-%m-%d')
df.set_index(['Impfdatum'], inplace=True)

sum_doses = df['Anzahl'].sum()

# use date as index
# drop the 2 Feb data rows (of 0 deaths)
df2.drop([0, 1], inplace=True)

df_covid_2020 = pd.DataFrame()
df_covid_2020['Deaths_Covid_2020'] = df1['Deaths_Covid_2020'].append(
    df2['Deaths_Covid_2020'], ignore_index=True)
df_covid_2020['Deaths_Covid_2020_roll'] = df_covid_2020[
    'Deaths_Covid_2020'].rolling(window=7, min_periods=1).mean().round(1)
# print(df_covid_2020.tail())
del df1, df2

# 2. fetch and parse Excel of mortality data from Destatis

excelFile = 'cache\de-mortality.xlsx'

if not helper.check_cache_file_available_and_recent(
        fname=excelFile, max_age=1800, verbose=False):
    url = "https://www.destatis.de/DE/Themen/Gesellschaft-Umwelt/Bevoelkerung/Sterbefaelle-Lebenserwartung/Tabellen/sonderauswertung-sterbefaelle.xlsx?__blob=publicationFile"
    filedata = urllib.request.urlopen(url)
    datatowrite = filedata.read()
    with open(excelFile, mode='wb') as f:
        f.write(datatowrite)

# data_only : read values instead of formulas
workbookIn = openpyxl.load_workbook(excelFile, data_only=True)
sheetIn = workbookIn['D_2016_2020_Tage']

l_dates = []
l_deaths2020 = []
l_deaths2019 = []
l_deaths2018 = []
l_deaths2017 = []
        d2["Code"] = code
        d2['Continent'] = continent
        d2["Population"] = pop
        d_countries_ref[name] = d2

    return d_countries_ref


d_countries_ref = read_ref_data_countries()

# d_ref_country_database = helper.read_json_file(
#     'data/ref_country_database.json')

d_selected_countries = read_ref_selected_countries()

if not helper.check_cache_file_available_and_recent(
        fname=file_cache, max_age=1800, verbose=True):
    download_new_data()

d_countries_timeseries = read_json_data()

check_for_further_interesting_countries()

# TODO: deprecated
# fit_doubling_time()

extract_latest_date_data()

# deprecated: extract_latest_date_data_selected()

export_time_series_all_countries()
コード例 #5
0
        df['-25%'] += gt_m025p * 1
        df['-50%'] += gt_m050p * 1
        df['-75%'] += gt_m075p * 1
        df['-100%'] += gt_m100p * 1
        count += 1
        # if count >= 10:
        #     break
    df.to_csv('cache/hist-de-districts.csv')
    return df


# TODO
# os.remove("cache/hist-de-districts.csv")

if helper.check_cache_file_available_and_recent(
        fname='cache/hist-de-districts.csv', max_age=3600,
        verbose=True) == False:
    df = read_and_prepare_data()
else:
    df = pd.read_csv('cache/hist-de-districts.csv')
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
    df.set_index(['Date'], inplace=True)


def CntToPerc(x):
    return x / 412 * 100


def PercToCnt(x):
    return x / 100 * 412