def _aggregate_recursive(df, variable, recursive): """Recursive aggregation along the variable tree""" # downselect to components of `variable`, initialize list for aggregated (new) data # keep variable at highest level if it exists _df = df.filter(variable=[variable, f"{variable}|*"]) data_list = [] # iterate over variables (bottom-up) and aggregate all components up to `variable` for d in reversed(range(find_depth(variable), max(find_depth(_df.variable)))): components = compress(_df.variable, find_depth(_df.variable, level=d + 1)) var_list = set([reduce_hierarchy(v, -1) for v in components]) # a temporary dataframe allows to distinguish between full data and new data _data_agg = _aggregate(_df, variable=var_list) # check if data for intermediate variables already exists _data_self = _df.filter(variable=var_list)._data _overlap = _data_agg.index.intersection(_data_self.index) _new = _data_agg.index.difference(_data_self.index) # assert that aggregated values are consistent with existing data (optional) if recursive != "skip-validate" and not _overlap.empty: conflict = _compare(_data_self, _data_agg[_overlap], "self", "aggregate") if not conflict.empty: msg = "Aggregated values are inconsistent with existing data:" raise ValueError(f"{msg}\n{conflict}") # append aggregated values that are not already in data _df.append(_data_agg[_new], inplace=True) data_list.append(_data_agg[_new]) return pd.concat(data_list)
def _aggregate_recursive(df, variable, method=np.sum): """Recursive aggregation along the variable tree""" _df_aggregated = None _df = df.copy() # iterate over variables to find all subcategories to be aggregated sub_variables = [] for d in reversed(range(1, max(find_depth(df.data.variable)) + 1)): depth = find_depth(df.data.variable) var_list = ( df.data.variable[[i == d for i in depth]] .unique() ) vars_up = pd.Series( [reduce_hierarchy(i, -1) for i in var_list]).unique() if [i for i, entr in enumerate(vars_up) if entr.startswith(variable)]: for v in vars_up: sub_variables.append(v) sub_variables = reversed(sorted(set(sub_variables))) # iterate over subcategories (bottom-up) and perform aggregation for entry in sub_variables: _df.aggregate(variable=entry, append=True) _df_temp = _df.aggregate(variable=entry, append=False) if _df_aggregated is None: _df_aggregated = _df_temp.copy() else: _df_aggregated.append(_df_temp, inplace=True) return _df_aggregated.data
def test_reduce_hierarchy_neg2(): assert utils.reduce_hierarchy('foo|bar|baz', -2) == 'foo'
def test_reduce_hierarchy_1(): assert utils.reduce_hierarchy('foo|bar|baz', 1) == 'foo|bar'
def test_reduce_hierarchy_0(): assert utils.reduce_hierarchy('foo|bar|baz', 0) == 'foo'
def test_reduce_hierarchy_neg2(): assert utils.reduce_hierarchy("foo|bar|baz", -2) == "foo"
def test_reduce_hierarchy_1(): assert utils.reduce_hierarchy("foo|bar|baz", 1) == "foo|bar"
def test_reduce_hierarchy_0(): assert utils.reduce_hierarchy("foo|bar|baz", 0) == "foo"