Example #1
0
def _aggregate_recursive(df, variable, recursive):
    """Recursive aggregation along the variable tree"""

    # downselect to components of `variable`, initialize list for aggregated (new) data
    # keep variable at highest level if it exists
    _df = df.filter(variable=[variable, f"{variable}|*"])
    data_list = []

    # iterate over variables (bottom-up) and aggregate all components up to `variable`
    for d in reversed(range(find_depth(variable), max(find_depth(_df.variable)))):
        components = compress(_df.variable, find_depth(_df.variable, level=d + 1))
        var_list = set([reduce_hierarchy(v, -1) for v in components])

        # a temporary dataframe allows to distinguish between full data and new data
        _data_agg = _aggregate(_df, variable=var_list)

        # check if data for intermediate variables already exists
        with adjust_log_level("pyam.core"):
            _data_self = _df.filter(variable=var_list)._data
        _overlap = _data_agg.index.intersection(_data_self.index)
        _new = _data_agg.index.difference(_data_self.index)

        # assert that aggregated values are consistent with existing data (optional)
        if recursive != "skip-validate" and not _overlap.empty:
            conflict = _compare(_data_self, _data_agg[_overlap], "self", "aggregate")
            if not conflict.empty:
                msg = "Aggregated values are inconsistent with existing data:"
                raise ValueError(f"{msg}\n{conflict}")

        # append aggregated values that are not already in data
        _df.append(_data_agg[_new], inplace=True)
        data_list.append(_data_agg[_new])

    return pd.concat(data_list)
Example #2
0
def _aggregate_region(df,
                      variable,
                      region,
                      subregions=None,
                      components=False,
                      method='sum',
                      weight=None):
    """Internal implementation for aggregating data over subregions"""
    if not isstr(variable) and components is not False:
        msg = 'aggregating by list of variables with components ' \
              'is not supported'
        raise ValueError(msg)

    if weight is not None and components is not False:
        msg = 'using weights and components in one operation not supported'
        raise ValueError(msg)

    # default subregions to all regions other than `region`
    subregions = subregions or df._all_other_regions(region, variable)

    if not len(subregions):
        msg = 'cannot aggregate variable `{}` to `{}` because it does not'\
              ' exist in any subregion'
        logger.info(msg.format(variable, region))

        return

    # compute aggregate over all subregions
    subregion_df = df.filter(region=subregions)
    rows = subregion_df._apply_filters(variable=variable)
    if weight is None:
        col = 'region'
        _data = _group_and_agg(subregion_df.data[rows], col, method=method)
    else:
        weight_rows = subregion_df._apply_filters(variable=weight)
        _data = _agg_weight(subregion_df.data[rows],
                            subregion_df.data[weight_rows], method)

    # if not `components=False`, add components at the `region` level
    if components is not False:
        with adjust_log_level(logger):
            region_df = df.filter(region=region)

        # if `True`, auto-detect `components` at the `region` level,
        # defaults to variables below `variable` only present in `region`
        if components is True:
            level = dict(level=None)
            r_comps = region_df._variable_components(variable, **level)
            sr_comps = subregion_df._variable_components(variable, **level)
            components = set(r_comps).difference(sr_comps)

        if len(components):
            # rename all components to `variable` and aggregate
            rows = region_df._apply_filters(variable=components)
            _df = region_df.data[rows].copy()
            _df['variable'] = variable
            _data = _data.add(_group_and_agg(_df, 'region'), fill_value=0)

    return _data
Example #3
0
def _aggregate_region(
    df,
    variable,
    region,
    subregions=None,
    components=False,
    method="sum",
    weight=None,
    drop_negative_weights=True,
):
    """Internal implementation for aggregating data over subregions"""
    if not isstr(variable) and components is not False:
        raise ValueError(
            "Aggregating by list of variables with components is not supported!"
        )

    if weight is not None and components is not False:
        raise ValueError("Using weights and components in one operation not supported!")

    # default subregions to all regions other than `region`
    subregions = subregions or df._all_other_regions(region, variable)

    if not len(subregions):
        logger.info(
            f"Cannot aggregate variable '{variable}' to '{region}' "
            "because it does not exist in any subregion!"
        )
        return

    # compute aggregate over all subregions
    subregion_df = df.filter(region=subregions)
    rows = subregion_df._apply_filters(variable=variable)
    if weight is None:

        if drop_negative_weights is False:
            raise ValueError(
                "Dropping negative weights can only be used with `weights`!"
            )

        _data = _group_and_agg(subregion_df._data[rows], "region", method=method)
    else:
        weight_rows = subregion_df._apply_filters(variable=weight)
        _data = _agg_weight(
            subregion_df._data[rows],
            subregion_df._data[weight_rows],
            method,
            drop_negative_weights,
        )

    # if not `components=False`, add components at the `region` level
    if components:
        with adjust_log_level(logger):
            region_df = df.filter(region=region)

        # if `True`, auto-detect `components` at the `region` level,
        # defaults to variables below `variable` only present in `region`
        if components is True:
            level = dict(level=None)
            r_comps = region_df._variable_components(variable, **level)
            sr_comps = subregion_df._variable_components(variable, **level)
            components = set(r_comps).difference(sr_comps)

        if len(components):
            # rename all components to `variable` and aggregate
            rows = region_df._apply_filters(variable=components)
            _df = region_df._data[rows]
            mapping = {c: variable for c in components}
            _df.index = replace_index_values(_df.index, "variable", mapping)
            _data = _data.add(_group_and_agg(_df, "region"), fill_value=0)

    return _data