def test_clean_data_germany_goettingen_alt_is_fluke(): germany_data = c.fetch_data_germany(filter_goettingen_alt=False) cleaned = c.fetch_data_germany(filter_goettingen_alt=True) ## could also use this command: cleaned = c.clean_data_germany_remove_goettingen_alt(germany_data) # how many rows did we delete? n = len(germany_data) - len(cleaned) if n == 1: # Normal as of 167 Sept (see oscovida.cleane_data_germany_remove_göttingen_alt.__doc__) pass elif n > 1: msg = f"We have found {n} rows of Göttingen alt data - please investigate\n" + \ "if this is a real / important LK in Germany" raise ValueError(msg, germany_data, cleaned) elif n == 0: msg = "There are now rows with LK Göttingen (alt). \n" + \ "Consider removing the data cleaning code for Göttingen (alt)." print(msg) # should we raise an error here to notice this situation? raise ValueError(msg) else: raise NotImplementedError("This should not be possible.", germany_data, cleaned)
def generate_reports_germany( *, workers, kernel_name, wwwroot, force, disable_pbar, debug, incidence_period=7, incidence_threshold=50, ): _ = oscovida.fetch_data_germany() # TODO: The get_x_list methods should be part of Reporter class germany_regions = get_germany_regions_list() # data cleaning: on 13 April, we had a Landkreis "LK Göttingen (alt)" # with only one data point. This causes plots to fail, because there # is nothing to plot, and then the legend() command failed. # We assume that the RKI labels unusual data with '(alt)', and remove those. alt_data_sets = ["(alt)" in r[1].lower() for r in germany_regions] if any(alt_data_sets): bad_datasets = list(compress(germany_regions, alt_data_sets)) logging.warning( f"Removing datasets label with '(alt)': {bad_datasets}") for bd in bad_datasets: c, d = oscovida.germany_get_region(landkreis=bd[1]) logging.warning( f"\tremoved: {bd} : len(cases)={len(c)}, len(deaths)={len(d)}") bad_indices = list(compress(range(len(alt_data_sets)), alt_data_sets)) for i in sorted(bad_indices, reverse=True): del germany_regions[i] gre = ReportExecutor( Reporter=GermanyReport, kernel_name=kernel_name, wwwroot=wwwroot, expiry_hours=2, attempts=3, workers=workers, force=force, disable_pbar=disable_pbar, debug=debug, ) if debug: germany_regions = germany_regions[:10] gre.create_html_reports(germany_regions) gre.create_markdown_index_page() gre.create_markdown_incidence_page(period=incidence_period, threshold=incidence_threshold)
def get_germany_subregion_list(): """returns list of subregions (Kreise), ordered according to (i) Land, then (ii) Kreis """ x = fetch_data_germany() land_kreis = x[['Bundesland', 'Landkreis']] ordered = land_kreis.sort_values(['Bundesland', 'Landkreis']) return list(ordered['Landkreis'].drop_duplicates())
def test_germany_get_population(): germany = c.germany_get_population() assert germany.index.name == 'county' assert 'population' in germany.columns assert 'cases7_per_100k' in germany.columns germany_data = c.fetch_data_germany() assert set(germany_data['Landkreis']) == set(germany.index) hamburg = germany.loc['SK Hamburg'].population assert hamburg > 1800000 pinneberg = germany.loc['LK Pinneberg'].population assert pinneberg > 30000
def test_germany_get_population(): germany = c.germany_get_population() assert germany.index.name == 'county' assert 'population' in germany.columns assert 'cases7_per_100k' in germany.columns germany_data = c.fetch_data_germany() assert set(germany_data['Landkreis']) == set(germany.index) hamburg = germany.loc['SK Hamburg'].population assert hamburg > 1800000 pinneberg = germany.loc['LK Pinneberg'].population assert pinneberg > 30000 # https://github.com/oscovida/oscovida/issues/210 saarpfalz = germany.loc['LK Saarpfalz-Kreis'].population assert saarpfalz > 130000 aachen = germany.loc['StadtRegion Aachen'].population assert aachen > 500000
def germany_check_subregion__is_known(subregion): d = oscovida.fetch_data_germany() assert subregion in list( d["Landkreis"].drop_duplicates()), f"{subregion} is unknown."
def germany_check_region_is_known(region): d = oscovida.fetch_data_germany() assert region in list( d["Bundesland"].drop_duplicates()), f"{region} is unknown."
def get_germany_regions_list(): data_germany = oscovida.fetch_data_germany() land_kreis = data_germany[["Bundesland", "Landkreis"]] ordered = land_kreis.sort_values(["Bundesland", "Landkreis"]) return ordered.drop_duplicates().values.tolist()