Ejemplo n.º 1
0
def query():
    field = Field(name="BEVMK3", fields=["value", "year"])
    query = Query.region(region="05911", fields=["id", "name", field])
    return query
Ejemplo n.º 2
0
def query_multi_regions():
    field = Field(name="BEVMK3", fields=["value", "year"])
    query = Query.region(region=["01", "02"], fields=["id", "name", field])
    return query
Ejemplo n.º 3
0
def test_QueryExecutionerWorkflow(query):
    """Functional test for the query executioner"""

    # Ira (W. Cotton, probably the first person to use the term API) want to
    # try the query execution part of the datenguide GraphQL wrapper library.
    # He already worked through Query builder objects ready for execution.
    # He understands that he first has to create an executioner object that
    # it uses the right by default, so that he does not have to supply any
    # parameters.

    qExec = QueryExecutioner()

    # After creating the object Ira is actually a little sceptical whether
    # the endpoint will correct so he extracts the endpoint and compares it
    # with his expectations.

    assert (qExec.endpoint == "https://api-next.datengui.de/graphql"
            ), "Default endpoint is wrong"

    # Being satisfied that everything is setup with the correct endpoint Ira
    # now wants to execute one of his queries to see that he gets some return
    # values.

    res_query1 = query.results()

    assert res_query1 is not None, "query did not return results"

    # He wants to have a closer look at the raw return query results and
    # remembers that they are sorted in the results field and he has a look.

    assert (type(res_query1) is
            pd.DataFrame), "query results are not a python json representation"

    stats = Query.get_info()

    # Ira remembers that he read about the executioners functionality to
    # return metadata along with the query results. So he wants to check
    # whether this metadata is actually present. And that it only contains
    # meta data related to his query

    meta_query1 = query.meta_data()

    # In particular Ira would like to have a more human readable description
    # of the statistic he asked for.

    assert "BEVMK3" in meta_query1, "statistic absend"
    assert (meta_query1["BEVMK3"] !=
            "NO DESCRIPTION FOUND"), "descrption was not obtained"

    # Being satisfied with the results he obtained for his simple query
    # he actually wants to try a larger one across several regions. He heard
    # that this might be an issue for the server in general, but that the
    # executioner takes care of addressing this issue by itself.

    # Since this is a lot of information Ira would particularly
    # like to drill down on the arguments that are allowed for his
    # favorite statistic BEVMK3

    stat_args = stats.fields["BEVMK3"].get_arguments()
    assert len(stat_args) > 0
    assert "statistics" in stat_args

    # Although this is already really helpful Ira notices that
    # one of the arguments is an ENUM and he would like to know
    # the possible values that he can use for it.

    enum_vals = Query.get_info("BEVMK3Statistics").enum_values
    assert type(enum_vals) == dict, "Enum values should be dict"
    assert len(enum_vals) > 0, "Enums should have values"

    # Ira wants to add another statistic to his query.
    statistic1 = query.add_field("BEV001")
    statistic1.add_field("year")
    statistic1.add_args({"year": 2017})

    assert type(statistic1) == Field, "statistic is not a Field"

    # Then he wants to get metainfo on the field.

    stringio = io.StringIO()
    sys.stdout = stringio

    statistic1.get_info()
    stats_info = re.sub(r"\n", "", stringio.getvalue())
    assert "OBJECT" in stats_info, "BEV001 should be an object"
def get_df_datenguide(region_codes, stat_codes):
    query = Query.region(region_codes, stat_codes)
    res = (query.results().drop_duplicates(ignore_index=True))
    res = res.loc[:, ~res.columns.str.contains("_source_")]
    return res
Ejemplo n.º 5
0
 def lau_query(lau_level):
     q = Query.all_regions(lau=lau_level)
     return q
Ejemplo n.º 6
0
 def nuts_query(nuts_level):
     q = Query.all_regions(nuts=nuts_level)
     return q
Ejemplo n.º 7
0
def download_all_regions() -> pd.DataFrame:
    """[summary]

    :raises RuntimeError: [description]
    :raises RuntimeError: [description]
    :return: [description]
    :rtype: pd.DataFrame
    """
    def nuts_query(nuts_level):
        q = Query.all_regions(nuts=nuts_level)
        return q

    def lau_query(lau_level):
        q = Query.all_regions(lau=lau_level)
        return q

    qb_all = Query.all_regions()

    qe = QueryExecutioner()
    print("start")
    all_regions = qe.run_query(qb_all)
    print("all")
    r_nuts1 = qe.run_query(nuts_query(1))
    print("nuts1")
    r_nuts2 = qe.run_query(nuts_query(2))
    print("nuts2")
    r_nuts3 = qe.run_query(nuts_query(3))
    print("nuts3")
    r_lau1 = qe.run_query(lau_query(1))
    print("lau")
    # currently no distinction between different laus
    # on datehenguide side
    # r_lau2 = qe.run_query(lau_query(2))

    levels = {
        "nuts1": r_nuts1,
        "nuts2": r_nuts2,
        "nuts3": r_nuts3,
        "lau": r_lau1,
        # 'lau2':r_lau2
    }

    def isAnscestor(region_id, candidate):
        """[summary]

        :param region_id: [description]
        :type region_id: [type]
        :param candidate: [description]
        :type candidate: [type]
        :return: [description]
        :rtype: [type]
        """
        return region_id.startswith(candidate) and candidate != region_id

    def parent(region_id, region_details):
        """[summary]

        :param region_id: [description]
        :type region_id: [type]
        :param region_details: [description]
        :type region_details: [type]
        :return: [description]
        :rtype: [type]
        """
        desc = region_details.assign(ansc=lambda df: df.index.map(
            lambda i: isAnscestor(region_id, i))).query("ansc")
        max_lev = desc.level.max()  # noqa: F841
        parent_frame = desc.query("level == @max_lev")
        if not parent_frame.empty:
            return parent_frame.iloc[0, :].name
        else:
            None

    if all_regions is None:
        raise RuntimeError("Was not able to download all regions")

    for k in levels:
        if levels[k] is None:
            raise RuntimeError(f"Was not able to download {k} regions")

    all_regions_df = pd.concat([
        pd.DataFrame(page["data"]["allRegions"]["regions"])
        for page in cast(List[ExecutionResults], all_regions)[0].query_results
    ]).set_index("id")

    level_df = pd.concat(
        pd.concat([
            pd.DataFrame(page["data"]["allRegions"]["regions"]) for page in
            cast(List[ExecutionResults], levels[k])[0].query_results
        ]).assign(level=k) for k in levels)

    all_rg_parents = all_regions_df.join(
        level_df.set_index("id").loc[:, "level"]).assign(
            parent=lambda df: df.index.map(
                partial(
                    parent,
                    region_details=all_regions_df.assign(level=lambda df: df.
                                                         index.map(len)),
                )))
    all_rg_parents.loc[all_rg_parents.level == "nuts1", "parent"] = "DG"

    return all_rg_parents