def read_json_data() -> dict: """ reads downloaded cached json file contents renames some country names according to ref database calls prepare_time_series adds _Per_Million fields NO LONGER exports as json file returns as a dict """ d_json_downloaded = helper.read_json_file(file_cache) # rename some countries d_countries_to_rename = {} d_countries_to_rename['US'] = 'United States' d_countries_to_rename['Korea, South'] = 'South Korea' d_countries_to_rename['Taiwan*'] = 'Taiwan' d_countries_to_rename['Burma'] = 'Myanmar' d_countries_to_rename['Cote d\'Ivoire'] = 'Ivory Coast' d_countries_to_rename['West Bank and Gaza'] = 'Palestinian Territory' d_countries_to_rename['Timor-Leste'] = 'Timor Leste' d_countries_to_rename['Holy See'] = 'Vatican' for country_name_old, country_name_new in d_countries_to_rename.items(): d_json_downloaded[country_name_new] = d_json_downloaded[ country_name_old] del d_json_downloaded[country_name_old] d_countries = {} # re-format date using my date_format(y,m,d) function for country in d_json_downloaded.keys(): country_data = d_json_downloaded[country] l_time_series = [] pop = read_population(country) if pop != None: pop_in_million = pop / 1000000 else: pop_in_million = None for entry in country_data: d = {} # entry in country_data: s = entry['date'] l = s.split("-") d['Date'] = helper.date_format(int(l[0]), int(l[1]), int(l[2])) d['Cases'] = int(entry['confirmed']) d['Deaths'] = int(entry['deaths']) l_time_series.append(d) l_time_series = helper.prepare_time_series(l_time_series) for i in range(len(l_time_series)): d = l_time_series[i] # _Per_Million d = helper.add_per_million(d, pop_in_million) d_countries[country] = l_time_series return d_countries
def read_csv_to_dict() -> dict: """ read and convert the source csv file, containing: federalstate,infections,deaths,date,newinfections,newdeaths re-calc _New via helper.prepare_time_series add _Per_Million via helper.add_per_million_via_lookup """ global d_ref_states # Preparations d_states_data = {'BW': [], 'BY': [], 'BE': [], 'BB': [], 'HB': [], 'HH': [], 'HE': [], 'MV': [ ], 'NI': [], 'NW': [], 'RP': [], 'SL': [], 'SN': [], 'ST': [], 'SH': [], 'TH': []} # add German sum d_states_data['DE-total'] = [] d_german_sums = {} # date -> 'infections', 'deaths', 'new infections', 'new deaths' # data body with open(download_file, mode='r', encoding='utf-8') as f: csv_reader = csv.DictReader(f, delimiter=",") for row in csv_reader: d = {} s = row['date'] l = s.split("-") d['Date'] = helper.date_format( int(l[0]), int(l[1]), int(l[2])) d['Cases'] = int(row["infections"]) d['Deaths'] = int(row["deaths"]) if row["federalstate"] == 'Baden-Württemberg': d_states_data['BW'].append(d) elif row["federalstate"] == 'Bavaria': d_states_data['BY'].append(d) elif row["federalstate"] == 'Berlin': d_states_data['BE'].append(d) elif row["federalstate"] == 'Brandenburg': d_states_data['BB'].append(d) elif row["federalstate"] == 'Bremen': d_states_data['HB'].append(d) elif row["federalstate"] == 'Hamburg': d_states_data['HH'].append(d) elif row["federalstate"] == 'Hesse': d_states_data['HE'].append(d) elif row["federalstate"] == 'Lower Saxony': d_states_data['NI'].append(d) elif row["federalstate"] == 'North Rhine-Westphalia': d_states_data['NW'].append(d) elif row["federalstate"] == 'Mecklenburg-Western Pomerania': d_states_data['MV'].append(d) elif row["federalstate"] == 'Rhineland-Palatinate': d_states_data['RP'].append(d) elif row["federalstate"] == 'Saarland': d_states_data['SL'].append(d) elif row["federalstate"] == 'Saxony': d_states_data['SN'].append(d) elif row["federalstate"] == 'Saxony-Anhalt': d_states_data['ST'].append(d) elif row["federalstate"] == 'Schleswig-Holstein': d_states_data['SH'].append(d) elif row["federalstate"] == 'Thuringia': d_states_data['TH'].append(d) else: assert 1 == 2, f"ERROR: unknown state: {row['federalstate']}" # add to German sum if d['Date'] not in d_german_sums: d2 = {} d2['Cases'] = d['Cases'] d2['Deaths'] = d['Deaths'] else: d2 = d_german_sums[d['Date']] d2['Cases'] += d['Cases'] d2['Deaths'] += d['Deaths'] d_german_sums[d['Date']] = d2 del d2 # German sum -> same dict for datum in d_german_sums.keys(): d = d_german_sums[datum] d['Date'] = datum # add date field d_states_data['DE-total'].append(d) del d_german_sums, d # check if DE-total of today and yesterday are equal, if so: remove last date if d_states_data['DE-total'][-1]['Cases'] == d_states_data['DE-total'][-2]['Cases']: print("WARNING: DE cases sum is unchanged") for code in d_states_data: d_states_data[code].pop() print(f"DE-States Last Date: {d_states_data['DE-total'][-1]['Date']}") for code in d_states_data.keys(): l_time_series = d_states_data[code] # add days past, _New, _Last_Week, etc l_time_series = helper.prepare_time_series(l_time_series) for i in range(len(l_time_series)): d = l_time_series[i] # add per Million rows d = helper.add_per_million_via_lookup(d, d_ref_states, code) # # fit cases data # dataCases = [] # dataDeaths = [] # for i in range(1, len(l_time_series)): # # x= day , y = cases # dataCases.append( # ( # l_time_series[i]['Days_Past'], # l_time_series[i]['Cases'] # ) # ) # dataDeaths.append( # ( # l_time_series[i]['Days_Past'], # l_time_series[i]['Deaths'] # ) # ) # fit_series_res = helper.series_of_fits( # dataCases, fit_range=7, max_days_past=60) # for i in range(0, len(l_time_series)): # this_Doubling_Time = "" # this_days_past = l_time_series[i]['Days_Past'] # if this_days_past in fit_series_res: # this_Doubling_Time = fit_series_res[this_days_past] # l_time_series[i]['Cases_Doubling_Time'] = this_Doubling_Time # fit_series_res = helper.series_of_fits( # dataDeaths, fit_range=7, max_days_past=60) # for i in range(0, len(l_time_series)): # this_Doubling_Time = "" # this_days_past = l_time_series[i]['Days_Past'] # if this_days_past in fit_series_res: # this_Doubling_Time = fit_series_res[this_days_past] # l_time_series[i]['Deaths_Doubling_Time'] = this_Doubling_Time d_states_data[code] = l_time_series return d_states_data