Beispiel #1
0
def _aggregate_recursive(df, variable, recursive):
    """Recursive aggregation along the variable tree"""

    # downselect to components of `variable`, initialize list for aggregated (new) data
    # keep variable at highest level if it exists
    _df = df.filter(variable=[variable, f"{variable}|*"])
    data_list = []

    # iterate over variables (bottom-up) and aggregate all components up to `variable`
    for d in reversed(range(find_depth(variable), max(find_depth(_df.variable)))):
        components = compress(_df.variable, find_depth(_df.variable, level=d + 1))
        var_list = set([reduce_hierarchy(v, -1) for v in components])

        # a temporary dataframe allows to distinguish between full data and new data
        _data_agg = _aggregate(_df, variable=var_list)

        # check if data for intermediate variables already exists
        _data_self = _df.filter(variable=var_list)._data
        _overlap = _data_agg.index.intersection(_data_self.index)
        _new = _data_agg.index.difference(_data_self.index)

        # assert that aggregated values are consistent with existing data (optional)
        if recursive != "skip-validate" and not _overlap.empty:
            conflict = _compare(_data_self, _data_agg[_overlap], "self", "aggregate")
            if not conflict.empty:
                msg = "Aggregated values are inconsistent with existing data:"
                raise ValueError(f"{msg}\n{conflict}")

        # append aggregated values that are not already in data
        _df.append(_data_agg[_new], inplace=True)
        data_list.append(_data_agg[_new])

    return pd.concat(data_list)
Beispiel #2
0
def _aggregate_recursive(df, variable, method=np.sum):
    """Recursive aggregation along the variable tree"""
    _df_aggregated = None
    _df = df.copy()

    # iterate over variables to find all subcategories to be aggregated
    sub_variables = []
    for d in reversed(range(1, max(find_depth(df.data.variable)) + 1)):
        depth = find_depth(df.data.variable)
        var_list = (
            df.data.variable[[i == d for i in depth]]
            .unique()
        )
        vars_up = pd.Series(
            [reduce_hierarchy(i, -1) for i in var_list]).unique()

        if [i for i, entr in enumerate(vars_up) if entr.startswith(variable)]:
            for v in vars_up:
                sub_variables.append(v)

    sub_variables = reversed(sorted(set(sub_variables)))

    # iterate over subcategories (bottom-up) and perform aggregation
    for entry in sub_variables:
        _df.aggregate(variable=entry, append=True)
        _df_temp = _df.aggregate(variable=entry, append=False)

        if _df_aggregated is None:
            _df_aggregated = _df_temp.copy()
        else:
            _df_aggregated.append(_df_temp, inplace=True)

    return _df_aggregated.data
Beispiel #3
0
def test_reduce_hierarchy_neg2():
    assert utils.reduce_hierarchy('foo|bar|baz', -2) == 'foo'
Beispiel #4
0
def test_reduce_hierarchy_1():
    assert utils.reduce_hierarchy('foo|bar|baz', 1) == 'foo|bar'
Beispiel #5
0
def test_reduce_hierarchy_0():
    assert utils.reduce_hierarchy('foo|bar|baz', 0) == 'foo'
Beispiel #6
0
def test_reduce_hierarchy_neg2():
    assert utils.reduce_hierarchy("foo|bar|baz", -2) == "foo"
Beispiel #7
0
def test_reduce_hierarchy_1():
    assert utils.reduce_hierarchy("foo|bar|baz", 1) == "foo|bar"
Beispiel #8
0
def test_reduce_hierarchy_0():
    assert utils.reduce_hierarchy("foo|bar|baz", 0) == "foo"