def test_query_helper():
    # Ira is happy with the functionality so far but is worried a bit
    # about the difficuilty of finding the right technichal ids for regions
    # and statistics.

    # He realizes that there is helper functionality to identify the
    # federal states quickly in a human readable way and wants to try
    # it for Berlin
    assert federal_states.Berlin == "11"

    # That already worked nicely but in general there are many regions.
    # Ira would like to easily search through all of them and realizes
    # that he can obtain a DataFrame for this.
    reg_locally_stored = get_regions()
    assert isinstance(reg_locally_stored, pd.DataFrame)

    # Being satisfied with the regions, Ira now wants to have a
    # look at an equivalent overview of statistics.

    statistics = get_statistics()
    assert isinstance(statistics, pd.DataFrame)

    # Although this might already be sufficient for finding
    # interesting statistics, Ira read that there is already
    # some basic build in search functionality which
    # he wants to try.

    filtered_statistics = get_statistics("scheidung")
    assert isinstance(filtered_statistics, pd.DataFrame)
    assert filtered_statistics.shape[0] < 50
def test_translated_statistic_overview_table():
    untranslated_statistics = get_statistics(
        stat_meta_data_provider=StatisticsSchemaJsonMetaDataProvider()
    )
    translated_statistics = get_statistics(
        stat_meta_data_provider=StatisticsSchemaJsonMetaDataProvider(),
        target_language="en",
        translation_provider=SchemaTranslationProvider(),
    )
    assert untranslated_statistics.shape == translated_statistics.shape
    assert not untranslated_statistics.equals(translated_statistics)
Beispiel #3
0
def get_topic(input):
    try:
        sim_stat = []
        separator = ' '
        words = nlp(separator.join(get_hotwords(input)))
        for doc in desc:
            list1 = []
            for teil in words:
                list2 = []
                for token in doc:
                    list2.append(token.similarity(teil))
                list1.append(max(np.array(list2)))
            mean = np.array(list1).mean()
            sim_stat.append(mean)
        table = get_statistics().iloc[np.array(sim_stat).argmax()]
        term = table['short_description']
        info = table['long_description']
        info = info.split("===Aussage===")
        if (len(info) > 1):
            info = info[1]
        else:
            info = info[0]
        info = info.split("Indikatorberechnung")
        info = info[0]
        info = info.split("=")
        info = info[0]
        info = info[:500]
        session['info'] = info
        return term
    except:
        return "False"
Beispiel #4
0
def get_chart():
    topic = session.get('topic')
    myid = session.get('myid')
    description = "short_description.str.contains('" + topic + "')"
    table = get_statistics().query(description, engine='python')
    q = Query.region(myid)
    field = table.iloc[0]
    field = field.name
    f1 = q.add_field(field)
    results = q.results()
    df = results.set_index('year')
    # Save df as csv
    df.to_csv('downloads/data.csv', sep='\t')

    fig = Figure()
    axis = fig.add_subplot(1, 1, 1)
    xs = x = df.index
    ys = y = df[field]

    axis.plot(xs, ys, linestyle='--', marker='o', color='b')

    axis.set_xlabel('Time')
    axis.set_ylabel("\n".join(wrap(topic + " in " + session.get('city'), 60)))
    axis.xaxis.set_major_locator(MaxNLocator(integer=True))
    fig.tight_layout()

    return fig
def test_statistic_overview_table():
    stats = get_statistics(
        stat_meta_data_provider=StatisticsSchemaJsonMetaDataProvider()
    )
    assert isinstance(stats, pd.DataFrame)
    assert stats.index.name == "statistic"
    assert list(stats.columns) == ["short_description", "long_description"]
    assert stats.shape[0] > 400
def test_statistic_overview_table():
    stats = get_statistics()
    assert isinstance(stats, pd.DataFrame)
    assert list(stats.columns) == [
        "statistics",
        "short_description",
        "long_description",
    ]
    assert stats.shape[0] > 400
Beispiel #7
0
def test_queryHelper():
    # Ira is happy with the functionality so far but is worried a bit
    # about the difficuilty of finding the right technichal ids for regions
    # and statistics.

    # He realizes that there is helper functionality to identify the
    # federal states quickly in a human readable way and wants to try
    # it for Berlin
    assert federal_states.Berlin == "11"

    # That already worked nicely but in general there are many regions.
    # Ira would like to easily search through all of them and realizes
    # that he can obtain a DataFrame for this.
    reg_locally_stored = get_all_regions()
    assert isinstance(reg_locally_stored, pd.DataFrame)

    # Ira reads in the help that this is a stored list of regions and
    # not obtained live from datenguide.
    # He knows that region definitions and ids don't change very
    # often, but he would like the ability to obtain the most up to date
    # regions anyways. He therefore tries the function download_all_regions
    # that is designed for this purpouse

    reg = download_all_regions()
    assert isinstance(reg, pd.DataFrame)
    assert list(reg.columns) == ["name", "level", "parent"]
    assert reg.index.name == "id"
    assert reg.shape[0] > 10000

    # Being satisfied with the regions, Ira now wants to have a
    # look at an equivalent overview of statistics.

    statistics = get_statistics()
    assert isinstance(statistics, pd.DataFrame)

    # Although this might already be sufficient for finding
    # interesting statistics, Ira read that there is already
    # some basic build in search functionality which
    # he wants to try.

    filtered_statistics = get_statistics("scheidung")
    assert isinstance(filtered_statistics, pd.DataFrame)
    assert filtered_statistics.shape[0] < 50
Beispiel #8
0
def get_chart_map():  # this is calling the chart
    try:
        topic = session.get('topic')
        regions = get_regions().query("level == 'nuts3'")
        cities = regions.query(
            '(parent == "091") | (parent == "092") | (parent == "093") | (parent == "094") | (parent == "095") | (parent == "096") | (parent == "097")'
        )

        # get multiple regions
        q = Query.region(list(cities.index))

        description = "short_description.str.contains('" + topic + "')"
        table = get_statistics().query(description, engine='python')

        field = table.iloc[0]
        field = field.name
        q.add_field(field)
        results_nuts3 = q.results()

        # read in shps
        shp_nuts2 = gpd.read_file("shp/bavaria_nuts2")
        max_year = max(results_nuts3["year"])
        results_nuts3_lastyear = results_nuts3[results_nuts3["year"] ==
                                               max_year]

        # prep for merging
        results_nuts3_lastyear = results_nuts3_lastyear.drop_duplicates()
        # test if df is empty
        row = results_nuts3_lastyear.iloc[4]
        emptytest = row.iloc[4]
        if (len(emptytest) != 0):
            results_nuts3_lastyear.loc[:, "name2"] = results_nuts3_lastyear[
                "name"].str.replace(", Landkreis", "")
            results_nuts3_lastyear.loc[:, "name2"] = results_nuts3_lastyear[
                "name2"].str.replace(", Landeshauptstadt", "")

            # merge datenguide data
            plot_data = shp_nuts2.merge(results_nuts3_lastyear,
                                        left_on="CC_2",
                                        right_on="id")

            # plot
            fig = Figure()
            axis = fig.add_subplot(1, 1, 1)

            axis = plot_data.plot(column=field, legend=True, ax=axis)
            fig.suptitle(topic + " in " + str(max_year))
            axis.set_axis_off()

            # return fig.get_figure()
            return fig
    except Exception as e:
        app.logger.error('an error occurred during the creation of the map:',
                         e)
Beispiel #9
0
import io
from flask import Flask, render_template, request, session
from flask import send_file  # to download files
from string import punctuation
import logging
import geopandas as gpd
from textwrap import wrap
from matplotlib.ticker import MaxNLocator  # for integer values when plotting

import spacy
import numpy as np

nlp = spacy.load("de_core_news_lg")  # German

# preparing statistics
statistics = get_statistics().short_description.values.tolist()
desc = []
for names in statistics:
    desc.append(nlp(names))

# all regions on nut3 level in Bavaria
bezirke = get_regions().query("parent == '09'")
z = []
ids = []
for i in bezirke.index.values.tolist():
    ids = ids + get_regions().query("parent == '" + str(i) +
                                    "'").name.index.tolist()
    z = z + get_regions().query("parent == '" + str(i) +
                                "'").name.values.tolist()

# transform names to nlp format for comparing