def get_statistics_description(selected_stats=selected_stats): ''' get the description string for statistics 'selected_stats' ''' q = Query.all_regions(parent='06') stat = q.add_field(selected_stats) return stat.description()
def get_data_all_years(selected_stats=selected_stats, norm=0, pop=pop): ''' For a given `selected_stats` returns data and unit, where data [dataframe]: the chosen statistics from datenguide.py with columns 'name', 'year', 'id' and stat values units [string]: name of corresponding unit Input: norm [int]: 0 if no normalization requested, else normalization by population and multiplied by factor norm pop [datafram]: dataframe with the population of all Hesse regions ''' for _ in range(3): # Database may throw an error on the first query try try: q = Query.all_regions(parent='06') stat = q.add_field(selected_stats) data = q.results(verbose_enums=True, add_units=True) break except: continue # for some reason entries are produced twice; remove them data.drop_duplicates(inplace=True) data, unit = data[['name', 'year', 'id', selected_stats]], data[selected_stats + '_unit'].iloc[0] if norm != 0: # Normalize dataframe data here by modifying selected_stats with the population dataframe df = pd.merge(data, pop, how='left') df[[selected_stats]] = df[selected_stats] / df['BEVSTD'] * norm data = df[['name', 'year', 'id', selected_stats]] return data, unit
def get_population_all_years(): ''' Returns pop [dataframe]: the population for all Hesse regions from datenguide.py with columns 'name', 'year', 'id' and stat values ''' selected_stats = 'BEVSTD' # Bevölkerungsstand (population statistic) selected_stats1 = 'R12411' # Fortschreibung des Bevölkerungsstandes (forward projection of populatin statistic) for _ in range(3): # Database may throw an error on the first query try try: q = Query.all_regions(parent='06') stat = q.add_field(selected_stats) stat.add_args({ 'statistics': selected_stats1 }) # One more level in this stat (exact source of the stat) pop = q.results(verbose_enums=True, add_units=True) break except: continue # for some reason entries are produced twice; remove them pop.drop_duplicates(inplace=True) pop = pop[['year', 'id', selected_stats]] return pop
def test_lau(field): query = Query.all_regions( parent="11", lau=3, fields=["id", "name", field], default_fields=False ) graphql_query = query.get_graphql_query()[0] assert re.sub(" +", " ", graphql_query.replace("\n", " ")) == re.sub( r"\n\s+", "", """query ($page : Int, $itemsPerPage : Int) { allRegions (page: $page, itemsPerPage: $itemsPerPage){ regions (parent: "11", lau: 3){ id name WAHL09 (year: 2017){value year PART04 } } page itemsPerPage total } }""", )
def test_add_fields_all_regions(): all_reg_query = Query.all_regions(parent="11") all_reg_query.add_field("BEV001") graphql_query = all_reg_query.get_graphql_query()[0] assert graphql_query == re.sub( r"\n\s+", "", """query ($page : Int, $itemsPerPage : Int) { allRegions (page: $page, itemsPerPage: $itemsPerPage){ regions (parent: "11"){ id name BEV001 { year value source {title_de valid_from periodicity name url }} } page itemsPerPage total } }""", )
def all_regions_query(field): return Query.all_regions( parent="11", fields=["id", "name", field], default_fields=False )
def test_get_field_info(): info = Query.all_regions().get_info("BEV001") assert "BEVM01" in info.fields
def test_get_all_stats_info(): info = Query.all_regions().get_info() assert "name" in info.fields