Ejemplo n.º 1
0
def convert_unit(df,
                 current,
                 to,
                 factor=None,
                 registry=None,
                 context=None,
                 inplace=False):
    """Internal implementation of unit conversion with explicit kwargs"""
    ret = df.copy() if not inplace else df

    # Mask for rows having *current* units to be converted, args for replace
    try:
        where = ret._data.index.get_loc_level(current, 'unit')[0]
    except KeyError:
        where = [False] * len(ret)

    index_args = [ret._data, 'unit', {current: to}]

    if factor:
        # Short code path: use an explicit conversion factor, don't use pint
        ret._data[where] *= factor
        ret._data.index = replace_index_values(*index_args)
        return None if inplace else ret

    # Convert using a pint.UnitRegistry; default the one from iam_units
    registry = registry or iam_units.registry

    # Make versions without -equiv
    _current, _to = [i.replace('-equiv', '') for i in [current, to]]
    # Pair of (magnitude, unit)
    qty = [ret.data.loc[where, 'value'].values, _current]

    try:
        # Create a vector pint.Quantity
        qty = registry.Quantity(*qty)
    except pint.UndefinedUnitError:
        # *qty* might include a GHG species; try GWP conversion
        result, _to = convert_gwp(context, qty, _to)
    except AttributeError:
        # .Quantity() did not exist
        raise TypeError(f'{registry} must be `pint.UnitRegistry`') from None
    else:
        # Ordinary conversion, using an empty Context if none was provided
        result = qty.to(_to, context or pint.Context())

    # Copy values from the result Quantity and assign units
    ret._data[where] = result.magnitude
    ret._data.index = replace_index_values(*index_args)

    return None if inplace else ret
Ejemplo n.º 2
0
def test_replace_index_level(test_pd_df, test_df_index, exp_scen, mapping, rows):
    """Assert that replace_index_value works as expected"""

    test_pd_df["scenario"] = exp_scen if rows is None else ["scen_a"] + exp_scen[1:]
    exp = test_pd_df.set_index(IAMC_IDX)

    test_df_index.index = replace_index_values(test_df_index, "scenario", mapping, rows)
    pdt.assert_frame_equal(exp, test_df_index)
Ejemplo n.º 3
0
def test_replace_index_level(test_pd_df, test_df_index, exp_scen, mapping):
    """Assert that replace_index_value works as expected"""
    pd_df = test_pd_df.copy()
    pd_df["scenario"] = exp_scen
    exp = pd_df.set_index(IAMC_IDX)

    obs = test_df_index.copy()
    obs.index = replace_index_values(obs, "scenario", mapping)

    pdt.assert_frame_equal(exp, obs)
Ejemplo n.º 4
0
def test_replace_index_level(test_pd_df, test_df_index):
    """Assert that replace_index_value works as expected"""
    pd_df = test_pd_df.copy()
    pd_df['scenario'] = ['scen_c', 'scen_c', 'scen_b']
    exp = pd_df.set_index(IAMC_IDX)

    obs = test_df_index.copy()
    obs.index = replace_index_values(obs, 'scenario', {'scen_a': 'scen_c'})

    pdt.assert_frame_equal(exp, obs)
Ejemplo n.º 5
0
def _aggregate(df, variable, components=None, method=np.sum):
    """Internal implementation of the `aggregate` function"""

    if components is not None:
        # ensure that components is a proper list (not a dictionary)
        if not islistable(components) or isinstance(components, dict):
            raise ValueError(
                f"Value for `components` must be a list, found: {components}"
            )

        # list of variables require default components (no manual list)
        if islistable(variable):
            raise NotImplementedError(
                "Aggregating by list of variables does not support `components`."
            )

    mapping = {}
    msg = "Cannot aggregate variable '{}' because it has no components!"
    # if single variable
    if isstr(variable):
        # default components to all variables one level below `variable`
        components = components or df._variable_components(variable)

        if not len(components):
            logger.info(msg.format(variable))
            return

        for c in components:
            mapping[c] = variable

    # else, use all variables one level below `variable` as components
    else:
        for v in variable if islistable(variable) else [variable]:
            _components = df._variable_components(v)
            if not len(_components):
                logger.info(msg.format(v))
                continue

            for c in _components:
                mapping[c] = v

    # rename all components to `variable` and aggregate
    _df = df._data[df._apply_filters(variable=mapping.keys())]
    _df.index = replace_index_values(_df, "variable", mapping)
    return _group_and_agg(_df, [], method)
Ejemplo n.º 6
0
def _aggregate_region(
    df,
    variable,
    region,
    subregions=None,
    components=False,
    method="sum",
    weight=None,
    drop_negative_weights=True,
):
    """Internal implementation for aggregating data over subregions"""
    if not isstr(variable) and components is not False:
        raise ValueError(
            "Aggregating by list of variables with components is not supported!"
        )

    if weight is not None and components is not False:
        raise ValueError("Using weights and components in one operation not supported!")

    # default subregions to all regions other than `region`
    subregions = subregions or df._all_other_regions(region, variable)

    if not len(subregions):
        logger.info(
            f"Cannot aggregate variable '{variable}' to '{region}' "
            "because it does not exist in any subregion!"
        )
        return

    # compute aggregate over all subregions
    subregion_df = df.filter(region=subregions)
    rows = subregion_df._apply_filters(variable=variable)
    if weight is None:

        if drop_negative_weights is False:
            raise ValueError(
                "Dropping negative weights can only be used with `weights`!"
            )

        _data = _group_and_agg(subregion_df._data[rows], "region", method=method)
    else:
        weight_rows = subregion_df._apply_filters(variable=weight)
        _data = _agg_weight(
            subregion_df._data[rows],
            subregion_df._data[weight_rows],
            method,
            drop_negative_weights,
        )

    # if not `components=False`, add components at the `region` level
    if components:
        with adjust_log_level(logger):
            region_df = df.filter(region=region)

        # if `True`, auto-detect `components` at the `region` level,
        # defaults to variables below `variable` only present in `region`
        if components is True:
            level = dict(level=None)
            r_comps = region_df._variable_components(variable, **level)
            sr_comps = subregion_df._variable_components(variable, **level)
            components = set(r_comps).difference(sr_comps)

        if len(components):
            # rename all components to `variable` and aggregate
            rows = region_df._apply_filters(variable=components)
            _df = region_df._data[rows]
            mapping = {c: variable for c in components}
            _df.index = replace_index_values(_df.index, "variable", mapping)
            _data = _data.add(_group_and_agg(_df, "region"), fill_value=0)

    return _data
Ejemplo n.º 7
0
def test_replace_index_level_raises(test_df_index):
    """Assert that replace_index_value raises with non-existing level"""
    with pytest.raises(KeyError):
        replace_index_values(test_df_index, "foo", {"scen_a": "scen_c"})
Ejemplo n.º 8
0
def _op_data(df,
             name,
             method,
             axis,
             fillna=None,
             args=(),
             ignore_units=False,
             **kwds):
    """Internal implementation of numerical operations on timeseries"""

    if axis not in df._data.index.names:
        raise ValueError(f"Unknown axis: {axis}")

    if method in KNOWN_OPS:
        method = KNOWN_OPS[method]
    elif callable(method):
        pass
    else:
        raise ValueError(f"Unknown method: {method}")

    cols = df._data.index.names.difference([axis])

    # replace args and and kwds with values of `df._data` if applicable
    # _data_args and _data_kwds track if an argument was replaced by `df._data` values
    n = len(args)
    _args, _data_args, _units_args = [None] * n, [False] * n, [None] * n
    for i, value in enumerate(args):
        _args[i], _units_args[i], _data_args[i] = _get_values(
            df, axis, value, cols, f"_arg{i}")

    _data_kwds, _unit_kwds = {}, {}
    for i, (key, value) in enumerate(kwds.items()):
        kwds[key], _unit_kwds[key], _data_kwds[key] = _get_values(
            df, axis, value, cols, key)

    # fast-pass on units: override pint for some methods if all kwds have the same unit
    if (method in [add, subtract, divide] and ignore_units is False
            and fillna is None and len(_unit_kwds["a"]) == 1
            and len(_unit_kwds["b"]) == 1 and registry.Unit(
                _unit_kwds["a"][0]) == registry.Unit(_unit_kwds["b"][0])):
        # activate ignore-units feature
        ignore_units = _unit_kwds["a"][0] if method in [add, subtract] else ""
        # downcast `pint.Quantity` to numerical value
        kwds["a"], kwds["b"] = _to_value(kwds["a"]), _to_value(kwds["b"])

    # cast args and kwds to pd.Series of pint.Quantity
    if ignore_units is False:
        for i, is_data in enumerate(_data_args):
            _args[i] = _to_quantity(_args[i]) if is_data else _args[i]
        for key, value in kwds.items():
            kwds[key] = _to_quantity(value) if _data_kwds[key] else value
    # else remove units from pd.Series
    else:
        for i, is_data in enumerate(_data_args):
            _args[i] = _args[i].reset_index("unit",
                                            drop=True) if is_data else _args[i]
        for key, value in kwds.items():
            kwds[key] = (value.reset_index("unit", drop=True)
                         if _data_kwds[key] else value)

    # merge all args and kwds that are based on `df._data` to apply fillna
    if fillna:
        _data_cols = [
            _args[i] for i, is_data in enumerate(_data_args) if is_data
        ]
        _data_cols += [
            kwds[key] for key, is_data in _data_kwds.items() if is_data
        ]
        _data = pd.merge(*_data_cols,
                         how="outer",
                         left_index=True,
                         right_index=True)

        _data.fillna(fillna, inplace=True)

        for i, is_data in enumerate(_data_args):
            if is_data:
                _args[i] = _data[f"_arg{i}"]
        for key, is_data in _data_kwds.items():
            if is_data:
                kwds[key] = _data[key]

    # apply method and check that returned object is valid
    result = method(*_args, **kwds)
    if not isinstance(result, pd.Series):
        msg = f"Value returned by `{method.__name__}` cannot be cast to an IamDataFrame"
        raise ValueError(f"{msg}: {result}")

    # separate pint quantities into numerical value and unit (as index)
    if ignore_units is False:
        _value = pd.DataFrame(
            [[i.magnitude, "{:~}".format(i.units)] for i in result.values],
            columns=["value", "unit"],
            index=result.index,
        ).set_index("unit", append=True)
        _value.index = replace_index_values(_value, "unit",
                                            {"dimensionless": ""})

    # otherwise, set unit (as index) to "unknown" or the value given by "ignore_units"
    else:
        index = append_index_level(
            result.index,
            codes=0,
            level="unknown" if ignore_units is True else ignore_units,
            name="unit",
        )
        _value = pd.Series(result.values, index=index, name="value")

    # append the `name` to the index on the `axis`
    _value.index = append_index_level(_value.index,
                                      codes=0,
                                      level=name,
                                      name=axis)
    return _value
Ejemplo n.º 9
0
def test_replace_index_level_raises(test_df_index):
    """Assert that replace_index_value raises with non-existing level"""
    with pytest.raises(KeyError):
        replace_index_values(test_df_index, 'foo', {'scen_a': 'scen_c'})