예제 #1
0
def test_add_args_stepwise():
    query = Query.region(region="11")
    statistic1 = query.add_field("BEV001")
    statistic1.add_field("year")
    statistic1.add_args({"year": 2017})

    query2 = Query.region(
        region="11", fields=[Field(name="BEV001", args={"year": 2017}, fields=["year"])]
    )

    assert query.get_graphql_query() == query2.get_graphql_query()
예제 #2
0
def test_add_fields_stepwise(patch_return_types):
    query = Query.region(region="11", default_fields=False)
    statistic1 = query.add_field("BEV001", default_fields=False)
    statistic1.add_field("year")
    statistic2 = Field(
        name="WAHL09",
        args={
            "year": 2017,
            "PART04": "ALL"
        },
        fields=["value", "PART04"],
        default_fields=False,
        return_type="WAHL09",
    )
    query.add_field(statistic2)

    query2 = Query.region(
        region="11",
        fields=[
            Field(
                name="BEV001",
                fields=["year"],
                default_fields=False,
                return_type="BEV001",
            ),
            Field(
                name="WAHL09",
                args={
                    "year": 2017,
                    "PART04": "ALL"
                },
                fields=["value", "PART04"],
                default_fields=False,
                return_type="WAHL09",
            ),
        ],
        default_fields=False,
    )
    assert query.get_graphql_query() == query2.get_graphql_query()

    graphql_query = query.get_graphql_query()
    assert graphql_query[0] == re.sub(
        "    ",
        "",
        """{
                region (id: "11"){
                    BEV001 {year }
                    WAHL09 (year: 2017, filter:{ PART04: { nin: []}}){value PART04 }
                }
        }""".replace("\n", ""),
    )
def get_statistics_description(selected_stats=selected_stats):
    '''
    get the description string for statistics 'selected_stats'
    '''
    q = Query.all_regions(parent='06')
    stat = q.add_field(selected_stats)
    return stat.description()
def get_data_all_years(selected_stats=selected_stats, norm=0, pop=pop):
    '''
    For a given `selected_stats` returns data and unit, where
    data [dataframe]: the chosen statistics from datenguide.py with columns 'name', 'year', 'id' and stat values
    units [string]: name of corresponding unit
    
    Input:
    norm [int]: 0 if no normalization requested, else normalization by population and multiplied by factor norm
    pop [datafram]: dataframe with the population of all Hesse regions
    '''

    for _ in range(3):  # Database may throw an error on the first query try
        try:
            q = Query.all_regions(parent='06')
            stat = q.add_field(selected_stats)

            data = q.results(verbose_enums=True, add_units=True)
            break
        except:
            continue

    # for some reason entries are produced twice; remove them
    data.drop_duplicates(inplace=True)

    data, unit = data[['name', 'year', 'id',
                       selected_stats]], data[selected_stats + '_unit'].iloc[0]

    if norm != 0:
        # Normalize dataframe data here by modifying selected_stats with the population dataframe
        df = pd.merge(data, pop, how='left')
        df[[selected_stats]] = df[selected_stats] / df['BEVSTD'] * norm

        data = df[['name', 'year', 'id', selected_stats]]

    return data, unit
def get_population_all_years():
    '''
    Returns pop [dataframe]: the population for all Hesse regions from datenguide.py with columns 'name', 'year', 'id' and stat values
    '''
    selected_stats = 'BEVSTD'  # Bevölkerungsstand (population statistic)
    selected_stats1 = 'R12411'  # Fortschreibung des Bevölkerungsstandes (forward projection of populatin statistic)

    for _ in range(3):  # Database may throw an error on the first query try
        try:
            q = Query.all_regions(parent='06')
            stat = q.add_field(selected_stats)
            stat.add_args({
                'statistics': selected_stats1
            })  # One more level in this stat (exact source of the stat)

            pop = q.results(verbose_enums=True, add_units=True)
            break
        except:
            continue

    # for some reason entries are produced twice; remove them
    pop.drop_duplicates(inplace=True)

    pop = pop[['year', 'id', selected_stats]]

    return pop
def query_with_enum(patch_return_types):
    q = Query.region("09", default_fields=False)
    stat = q.add_field("WAHL09")
    stat.add_field("PART04")
    stat.add_field("year")
    stat.add_field("value")
    return q
예제 #7
0
def test_multiple_regions_query():
    query = Query.region(region=["01", "02"], fields=["BEV001"])
    graphql_query = query.get_graphql_query()
    assert len(graphql_query) == 2, "wrong amount of query strings"
    assert graphql_query[1].startswith(
        '{region (id: "02")'
    ), "not properly iterated over all regions"
예제 #8
0
def get_chart():
    topic = session.get('topic')
    myid = session.get('myid')
    description = "short_description.str.contains('" + topic + "')"
    table = get_statistics().query(description, engine='python')
    q = Query.region(myid)
    field = table.iloc[0]
    field = field.name
    f1 = q.add_field(field)
    results = q.results()
    df = results.set_index('year')
    # Save df as csv
    df.to_csv('downloads/data.csv', sep='\t')

    fig = Figure()
    axis = fig.add_subplot(1, 1, 1)
    xs = x = df.index
    ys = y = df[field]

    axis.plot(xs, ys, linestyle='--', marker='o', color='b')

    axis.set_xlabel('Time')
    axis.set_ylabel("\n".join(wrap(topic + " in " + session.get('city'), 60)))
    axis.xaxis.set_major_locator(MaxNLocator(integer=True))
    fig.tight_layout()

    return fig
예제 #9
0
def get_chart_map():  # this is calling the chart
    try:
        topic = session.get('topic')
        regions = get_regions().query("level == 'nuts3'")
        cities = regions.query(
            '(parent == "091") | (parent == "092") | (parent == "093") | (parent == "094") | (parent == "095") | (parent == "096") | (parent == "097")'
        )

        # get multiple regions
        q = Query.region(list(cities.index))

        description = "short_description.str.contains('" + topic + "')"
        table = get_statistics().query(description, engine='python')

        field = table.iloc[0]
        field = field.name
        q.add_field(field)
        results_nuts3 = q.results()

        # read in shps
        shp_nuts2 = gpd.read_file("shp/bavaria_nuts2")
        max_year = max(results_nuts3["year"])
        results_nuts3_lastyear = results_nuts3[results_nuts3["year"] ==
                                               max_year]

        # prep for merging
        results_nuts3_lastyear = results_nuts3_lastyear.drop_duplicates()
        # test if df is empty
        row = results_nuts3_lastyear.iloc[4]
        emptytest = row.iloc[4]
        if (len(emptytest) != 0):
            results_nuts3_lastyear.loc[:, "name2"] = results_nuts3_lastyear[
                "name"].str.replace(", Landkreis", "")
            results_nuts3_lastyear.loc[:, "name2"] = results_nuts3_lastyear[
                "name2"].str.replace(", Landeshauptstadt", "")

            # merge datenguide data
            plot_data = shp_nuts2.merge(results_nuts3_lastyear,
                                        left_on="CC_2",
                                        right_on="id")

            # plot
            fig = Figure()
            axis = fig.add_subplot(1, 1, 1)

            axis = plot_data.plot(column=field, legend=True, ax=axis)
            fig.suptitle(topic + " in " + str(max_year))
            axis.set_axis_off()

            # return fig.get_figure()
            return fig
    except Exception as e:
        app.logger.error('an error occurred during the creation of the map:',
                         e)
예제 #10
0
def test_get_complex_graphql_string_without_args(patch_return_types):
    field = Field(name="WAHL09", fields=["value"], default_fields=False)
    no_args_query = Query.region(region="09",
                                 fields=["id", "name", field],
                                 default_fields=False)

    graphql_query = no_args_query.get_graphql_query()
    assert graphql_query[0] == re.sub(
        "    ",
        "",
        """{
        region (id: "09"){
            id name WAHL09 {value }}
            }""".replace("\n", ""),
    )
예제 #11
0
def test_lau(field):
    query = Query.all_regions(
        parent="11", lau=3, fields=["id", "name", field], default_fields=False
    )
    graphql_query = query.get_graphql_query()[0]
    assert re.sub(" +", " ", graphql_query.replace("\n", " ")) == re.sub(
        r"\n\s+",
        "",
        """query ($page : Int, $itemsPerPage : Int) {
            allRegions (page: $page, itemsPerPage: $itemsPerPage){
                regions (parent: "11", lau: 3){
                    id name WAHL09 (year: 2017){value year PART04 }
                }
                page itemsPerPage total }
        }""",
    )
예제 #12
0
def test_add_fields_all_regions():
    all_reg_query = Query.all_regions(parent="11")
    all_reg_query.add_field("BEV001")

    graphql_query = all_reg_query.get_graphql_query()[0]
    assert graphql_query == re.sub(
        r"\n\s+",
        "",
        """query ($page : Int, $itemsPerPage : Int) {
            allRegions (page: $page, itemsPerPage: $itemsPerPage){
                regions (parent: "11"){
                    id name BEV001 {
                        year value source {title_de valid_from periodicity name url }}
                }
                page itemsPerPage total }
        }""",
    )
예제 #13
0
def test_multiple_filter_args(patch_return_types):
    statistic1 = Field(
        name="BETR09",
        args={"FRUNW2": ["FRUART0111", "FRUART0112"]},
        fields=["FRUNW2", "value", "year"],
        default_fields=False,
    )
    multiple_args_query = Query.region(region="02",
                                       fields=[statistic1],
                                       default_fields=False)

    graphql_query = multiple_args_query.get_graphql_query()
    assert graphql_query[0] == re.sub(
        "    ",
        "",
        """{
            region (id: "02"){
                BETR09 (FRUNW2: [FRUART0111, FRUART0112]){FRUNW2 value year }
                }
            }""".replace("\n", ""),
    )
예제 #14
0
def test_filter_for_all(query):
    field = Field(
        name="WAHL09",
        args={"year": 2017, "PART04": "ALL"},
        fields=["value", "PART04"],
        default_fields=False,
        return_type="WAHL09",
    )
    query = Query.region(region="09", fields=["id", "name", field])
    graphql_query = query.get_graphql_query()
    assert graphql_query[0] == re.sub(
        "    ",
        "",
        """{
            region (id: "09"){
                id name WAHL09 (
                    year: 2017, filter:{ PART04: { nin: []}}){
                        value PART04 }
                }
        }""".replace(
            "\n", ""
        ),
    )
예제 #15
0
#from datenguidepy.query_helper import get_regions, get_statistics, get_availability_summary
from datenguidepy import Query
import pandas as pd
import matplotlib.pyplot as plt

## python info-stats
#print(r[(r['name']=='Berlin')])
#print(r[r['name'].str.contains("Berlin|Hamburg|München|Köln|Frankfurt|Stuttgart|Düsseldorf|Dortmund|Essen|Leipzig|Bremen|Dresden")])
#stats_names = get_statistics()

# Availability
#availability = get_availability_summary()
#print(availability.shape)

## Query Statistics Accidents + some info on cities (pop, dens, cars)
q = Query.region(['11000', '14612', '14713', '02000', '04011', '05111', '05113', '05315', '05913', '06412', '08111', '09162'])

#stat = q.add_field("VER056")
#stat.get_info()

q.add_field('BEV016')  ## Einwohner
q.add_field('AI0201')  ## Bevölkerungsdichte (Einwohner je qkm)
q.add_field('AI0113')	 ## Siedlungs- und Verkehrsfläche je Einwohner
q.add_field('AI1301')  ## Pkw je 1.000 Einwohner
q.add_field('AI1302')  ## Straßenverkehrsunfälle je 10.000 Einwohner
q.add_field('AI1303')  ## Straßenverkehrsunfälle je 10.000 Kfz
q.add_field('AI1304')  ## Getötete bei Straßenverkehrsunfällen je 100.000 EW
q.add_field('AI1305')  ## Verletzte bei Straßenverkehrsunfällen je 100.000EW
q.add_field('VER001')  ## Unfälle (insgesamt)
q.add_field('VER002')  ## Unfälle mit Personenschaden
q.add_field('VER005')  ## Schwerwiegende Unfälle mit Sachschäden i.e.S.
예제 #16
0
def query():
    return Query.region(region="09", fields=["BEV001"], default_fields=False)
예제 #17
0
def query_default():
    return Query.region(region="09", fields=["BEV001"])
예제 #18
0
# get short names for statistics
stats_names = get_statistics()
print(stats_names.head(10))

# Availability
availability = get_availability_summary()
print(availability.shape)

# Availability for Berlin
availability_berlin = availability[availability['region_name'] == 'Berlin']
print(availability_berlin.shape)  # 1133 statistics
print(availability_berlin.head(20))

# query region Berlin (id == '11')
q = Query.region('11')

# add fields
#q.add_field('BEV001') # Statistik der Geburten
#q.add_field('BEV002') # Statistik der Sterbefaelle
#q.add_field('ERWP06') # Arbeitsmarktstatistik der Bundesagentur für Arbeit
#q.add_field('FLC047') # Agrarstrukturerhebung / Landwirtschaftszählung
#q.add_field('PERS01') # Statistik der öffentlich geförderten Kindertagespflege
#q.add_field('FLCX05') # Flächenerhebung nach Art der tatsächlichen Nutzung
q.add_field('BEV081')  # Wanderungsstatistik Zuzuege
q.add_field('BEV082')  # Wanderungsstatistik Fortzuege

# get the results as a Pandas DataFrame
df_results = q.results(add_units=True)

# print column names
예제 #19
0
def complex_query(field):
    return Query.region(region="09", fields=["id", "name", field], default_fields=False)
예제 #20
0
def test_get_field_info():
    info = Query.get_info("BEV001")
    assert "BEVM01" in info.fields
def query_default(patch_return_types):
    return Query.region(region="09", fields=["BEV001"])
예제 #22
0
def all_regions_query(field):
    return Query.all_regions(
        parent="11", fields=["id", "name", field], default_fields=False
    )
예제 #23
0
def test_get_all_stats_info():
    info = Query.get_info()
    assert "name" in info.fields
def test_missing_statistic_field_raises_exception():
    q = Query.region("09162000")
    with pytest.raises(Exception, match=r"add .* field"):
        q.results()
def query(patch_return_types):
    return Query.region(region="09", fields=["BEV001"], default_fields=False)
def test_invalid_region_name_raises_exception():
    q = Query.region("jodelverein")
    q.add_field("BEV001")
    with pytest.raises(ValueError, match=r"region is invalid."):
        q.results()
예제 #27
0
def test_create_query_class_without_start_filed_raises_error():
    with pytest.raises(TypeError):
        Query()
예제 #28
0
def test_get_field_info():
    info = Query.all_regions().get_info("BEV001")
    assert "BEVM01" in info.fields