def test_add_args_stepwise(): query = Query.region(region="11") statistic1 = query.add_field("BEV001") statistic1.add_field("year") statistic1.add_args({"year": 2017}) query2 = Query.region( region="11", fields=[Field(name="BEV001", args={"year": 2017}, fields=["year"])] ) assert query.get_graphql_query() == query2.get_graphql_query()
def test_add_fields_stepwise(patch_return_types): query = Query.region(region="11", default_fields=False) statistic1 = query.add_field("BEV001", default_fields=False) statistic1.add_field("year") statistic2 = Field( name="WAHL09", args={ "year": 2017, "PART04": "ALL" }, fields=["value", "PART04"], default_fields=False, return_type="WAHL09", ) query.add_field(statistic2) query2 = Query.region( region="11", fields=[ Field( name="BEV001", fields=["year"], default_fields=False, return_type="BEV001", ), Field( name="WAHL09", args={ "year": 2017, "PART04": "ALL" }, fields=["value", "PART04"], default_fields=False, return_type="WAHL09", ), ], default_fields=False, ) assert query.get_graphql_query() == query2.get_graphql_query() graphql_query = query.get_graphql_query() assert graphql_query[0] == re.sub( " ", "", """{ region (id: "11"){ BEV001 {year } WAHL09 (year: 2017, filter:{ PART04: { nin: []}}){value PART04 } } }""".replace("\n", ""), )
def get_statistics_description(selected_stats=selected_stats): ''' get the description string for statistics 'selected_stats' ''' q = Query.all_regions(parent='06') stat = q.add_field(selected_stats) return stat.description()
def get_data_all_years(selected_stats=selected_stats, norm=0, pop=pop): ''' For a given `selected_stats` returns data and unit, where data [dataframe]: the chosen statistics from datenguide.py with columns 'name', 'year', 'id' and stat values units [string]: name of corresponding unit Input: norm [int]: 0 if no normalization requested, else normalization by population and multiplied by factor norm pop [datafram]: dataframe with the population of all Hesse regions ''' for _ in range(3): # Database may throw an error on the first query try try: q = Query.all_regions(parent='06') stat = q.add_field(selected_stats) data = q.results(verbose_enums=True, add_units=True) break except: continue # for some reason entries are produced twice; remove them data.drop_duplicates(inplace=True) data, unit = data[['name', 'year', 'id', selected_stats]], data[selected_stats + '_unit'].iloc[0] if norm != 0: # Normalize dataframe data here by modifying selected_stats with the population dataframe df = pd.merge(data, pop, how='left') df[[selected_stats]] = df[selected_stats] / df['BEVSTD'] * norm data = df[['name', 'year', 'id', selected_stats]] return data, unit
def get_population_all_years(): ''' Returns pop [dataframe]: the population for all Hesse regions from datenguide.py with columns 'name', 'year', 'id' and stat values ''' selected_stats = 'BEVSTD' # Bevölkerungsstand (population statistic) selected_stats1 = 'R12411' # Fortschreibung des Bevölkerungsstandes (forward projection of populatin statistic) for _ in range(3): # Database may throw an error on the first query try try: q = Query.all_regions(parent='06') stat = q.add_field(selected_stats) stat.add_args({ 'statistics': selected_stats1 }) # One more level in this stat (exact source of the stat) pop = q.results(verbose_enums=True, add_units=True) break except: continue # for some reason entries are produced twice; remove them pop.drop_duplicates(inplace=True) pop = pop[['year', 'id', selected_stats]] return pop
def query_with_enum(patch_return_types): q = Query.region("09", default_fields=False) stat = q.add_field("WAHL09") stat.add_field("PART04") stat.add_field("year") stat.add_field("value") return q
def test_multiple_regions_query(): query = Query.region(region=["01", "02"], fields=["BEV001"]) graphql_query = query.get_graphql_query() assert len(graphql_query) == 2, "wrong amount of query strings" assert graphql_query[1].startswith( '{region (id: "02")' ), "not properly iterated over all regions"
def get_chart(): topic = session.get('topic') myid = session.get('myid') description = "short_description.str.contains('" + topic + "')" table = get_statistics().query(description, engine='python') q = Query.region(myid) field = table.iloc[0] field = field.name f1 = q.add_field(field) results = q.results() df = results.set_index('year') # Save df as csv df.to_csv('downloads/data.csv', sep='\t') fig = Figure() axis = fig.add_subplot(1, 1, 1) xs = x = df.index ys = y = df[field] axis.plot(xs, ys, linestyle='--', marker='o', color='b') axis.set_xlabel('Time') axis.set_ylabel("\n".join(wrap(topic + " in " + session.get('city'), 60))) axis.xaxis.set_major_locator(MaxNLocator(integer=True)) fig.tight_layout() return fig
def get_chart_map(): # this is calling the chart try: topic = session.get('topic') regions = get_regions().query("level == 'nuts3'") cities = regions.query( '(parent == "091") | (parent == "092") | (parent == "093") | (parent == "094") | (parent == "095") | (parent == "096") | (parent == "097")' ) # get multiple regions q = Query.region(list(cities.index)) description = "short_description.str.contains('" + topic + "')" table = get_statistics().query(description, engine='python') field = table.iloc[0] field = field.name q.add_field(field) results_nuts3 = q.results() # read in shps shp_nuts2 = gpd.read_file("shp/bavaria_nuts2") max_year = max(results_nuts3["year"]) results_nuts3_lastyear = results_nuts3[results_nuts3["year"] == max_year] # prep for merging results_nuts3_lastyear = results_nuts3_lastyear.drop_duplicates() # test if df is empty row = results_nuts3_lastyear.iloc[4] emptytest = row.iloc[4] if (len(emptytest) != 0): results_nuts3_lastyear.loc[:, "name2"] = results_nuts3_lastyear[ "name"].str.replace(", Landkreis", "") results_nuts3_lastyear.loc[:, "name2"] = results_nuts3_lastyear[ "name2"].str.replace(", Landeshauptstadt", "") # merge datenguide data plot_data = shp_nuts2.merge(results_nuts3_lastyear, left_on="CC_2", right_on="id") # plot fig = Figure() axis = fig.add_subplot(1, 1, 1) axis = plot_data.plot(column=field, legend=True, ax=axis) fig.suptitle(topic + " in " + str(max_year)) axis.set_axis_off() # return fig.get_figure() return fig except Exception as e: app.logger.error('an error occurred during the creation of the map:', e)
def test_get_complex_graphql_string_without_args(patch_return_types): field = Field(name="WAHL09", fields=["value"], default_fields=False) no_args_query = Query.region(region="09", fields=["id", "name", field], default_fields=False) graphql_query = no_args_query.get_graphql_query() assert graphql_query[0] == re.sub( " ", "", """{ region (id: "09"){ id name WAHL09 {value }} }""".replace("\n", ""), )
def test_lau(field): query = Query.all_regions( parent="11", lau=3, fields=["id", "name", field], default_fields=False ) graphql_query = query.get_graphql_query()[0] assert re.sub(" +", " ", graphql_query.replace("\n", " ")) == re.sub( r"\n\s+", "", """query ($page : Int, $itemsPerPage : Int) { allRegions (page: $page, itemsPerPage: $itemsPerPage){ regions (parent: "11", lau: 3){ id name WAHL09 (year: 2017){value year PART04 } } page itemsPerPage total } }""", )
def test_add_fields_all_regions(): all_reg_query = Query.all_regions(parent="11") all_reg_query.add_field("BEV001") graphql_query = all_reg_query.get_graphql_query()[0] assert graphql_query == re.sub( r"\n\s+", "", """query ($page : Int, $itemsPerPage : Int) { allRegions (page: $page, itemsPerPage: $itemsPerPage){ regions (parent: "11"){ id name BEV001 { year value source {title_de valid_from periodicity name url }} } page itemsPerPage total } }""", )
def test_multiple_filter_args(patch_return_types): statistic1 = Field( name="BETR09", args={"FRUNW2": ["FRUART0111", "FRUART0112"]}, fields=["FRUNW2", "value", "year"], default_fields=False, ) multiple_args_query = Query.region(region="02", fields=[statistic1], default_fields=False) graphql_query = multiple_args_query.get_graphql_query() assert graphql_query[0] == re.sub( " ", "", """{ region (id: "02"){ BETR09 (FRUNW2: [FRUART0111, FRUART0112]){FRUNW2 value year } } }""".replace("\n", ""), )
def test_filter_for_all(query): field = Field( name="WAHL09", args={"year": 2017, "PART04": "ALL"}, fields=["value", "PART04"], default_fields=False, return_type="WAHL09", ) query = Query.region(region="09", fields=["id", "name", field]) graphql_query = query.get_graphql_query() assert graphql_query[0] == re.sub( " ", "", """{ region (id: "09"){ id name WAHL09 ( year: 2017, filter:{ PART04: { nin: []}}){ value PART04 } } }""".replace( "\n", "" ), )
#from datenguidepy.query_helper import get_regions, get_statistics, get_availability_summary from datenguidepy import Query import pandas as pd import matplotlib.pyplot as plt ## python info-stats #print(r[(r['name']=='Berlin')]) #print(r[r['name'].str.contains("Berlin|Hamburg|München|Köln|Frankfurt|Stuttgart|Düsseldorf|Dortmund|Essen|Leipzig|Bremen|Dresden")]) #stats_names = get_statistics() # Availability #availability = get_availability_summary() #print(availability.shape) ## Query Statistics Accidents + some info on cities (pop, dens, cars) q = Query.region(['11000', '14612', '14713', '02000', '04011', '05111', '05113', '05315', '05913', '06412', '08111', '09162']) #stat = q.add_field("VER056") #stat.get_info() q.add_field('BEV016') ## Einwohner q.add_field('AI0201') ## Bevölkerungsdichte (Einwohner je qkm) q.add_field('AI0113') ## Siedlungs- und Verkehrsfläche je Einwohner q.add_field('AI1301') ## Pkw je 1.000 Einwohner q.add_field('AI1302') ## Straßenverkehrsunfälle je 10.000 Einwohner q.add_field('AI1303') ## Straßenverkehrsunfälle je 10.000 Kfz q.add_field('AI1304') ## Getötete bei Straßenverkehrsunfällen je 100.000 EW q.add_field('AI1305') ## Verletzte bei Straßenverkehrsunfällen je 100.000EW q.add_field('VER001') ## Unfälle (insgesamt) q.add_field('VER002') ## Unfälle mit Personenschaden q.add_field('VER005') ## Schwerwiegende Unfälle mit Sachschäden i.e.S.
def query(): return Query.region(region="09", fields=["BEV001"], default_fields=False)
def query_default(): return Query.region(region="09", fields=["BEV001"])
# get short names for statistics stats_names = get_statistics() print(stats_names.head(10)) # Availability availability = get_availability_summary() print(availability.shape) # Availability for Berlin availability_berlin = availability[availability['region_name'] == 'Berlin'] print(availability_berlin.shape) # 1133 statistics print(availability_berlin.head(20)) # query region Berlin (id == '11') q = Query.region('11') # add fields #q.add_field('BEV001') # Statistik der Geburten #q.add_field('BEV002') # Statistik der Sterbefaelle #q.add_field('ERWP06') # Arbeitsmarktstatistik der Bundesagentur für Arbeit #q.add_field('FLC047') # Agrarstrukturerhebung / Landwirtschaftszählung #q.add_field('PERS01') # Statistik der öffentlich geförderten Kindertagespflege #q.add_field('FLCX05') # Flächenerhebung nach Art der tatsächlichen Nutzung q.add_field('BEV081') # Wanderungsstatistik Zuzuege q.add_field('BEV082') # Wanderungsstatistik Fortzuege # get the results as a Pandas DataFrame df_results = q.results(add_units=True) # print column names
def complex_query(field): return Query.region(region="09", fields=["id", "name", field], default_fields=False)
def test_get_field_info(): info = Query.get_info("BEV001") assert "BEVM01" in info.fields
def query_default(patch_return_types): return Query.region(region="09", fields=["BEV001"])
def all_regions_query(field): return Query.all_regions( parent="11", fields=["id", "name", field], default_fields=False )
def test_get_all_stats_info(): info = Query.get_info() assert "name" in info.fields
def test_missing_statistic_field_raises_exception(): q = Query.region("09162000") with pytest.raises(Exception, match=r"add .* field"): q.results()
def query(patch_return_types): return Query.region(region="09", fields=["BEV001"], default_fields=False)
def test_invalid_region_name_raises_exception(): q = Query.region("jodelverein") q.add_field("BEV001") with pytest.raises(ValueError, match=r"region is invalid."): q.results()
def test_create_query_class_without_start_filed_raises_error(): with pytest.raises(TypeError): Query()
def test_get_field_info(): info = Query.all_regions().get_info("BEV001") assert "BEVM01" in info.fields