예제 #1
0
def read_worldbank(model="World Bank", scenario="WDI", **kwargs):
    """Read data from the World Bank Data Catalogue and return as IamDataFrame

    This function is a simple wrapper for the class
    :class:`pandas_datareader.wb.WorldBankReader` and the function
    :func:`pandas_datareader.wb.download`. Import the module to retrieve/search
    the list of indicators (and their id's), countries, etc.

    .. code-block:: python

        from pandas_datareader import wb

    Parameters
    ----------
    model : str, optional
        The `model` name to be used for the returned timeseries data.
    scenario : str, optional
        The `scenario` name to be used for the returned timeseries data.
    kwargs
        passed to :func:`pandas_datareader.wb.download`

    Notes
    -----
    The function :func:`pandas_datareader.wb.download` takes an `indicator`
    argument, which can be a string or list of strings. If the `indicator`
    passed to :func:`read_worldbank` is a dictionary of a World Bank id mapped
    to a string, the variables in the returned IamDataFrame will be renamed.

    The function :func:`pandas_datareader.wb.download` does not return a unit,
    but it can be collected for some indicators using the function
    :func:`pandas_datareader.wb.get_indicators`.
    In the current implementation, unit is defined as `n/a` for all data;
    this can be enhanced later (if there is interest from users).

    Returns
    -------
    :class:`IamDataFrame`
    """
    if not HAS_DATAREADER:  # pragma: no cover
        raise ImportError("Required package `pandas-datareader` not found!")

    data = wb.download(**kwargs)
    df = IamDataFrame(
        data.reset_index(),
        model=model,
        scenario=scenario,
        value=data.columns,
        unit="n/a",
        region="country",
    )
    # TODO use wb.get_indicators to retrieve corrent units (where available)

    # if `indicator` is a mapping, use it for renaming
    if "indicator" in kwargs and isinstance(kwargs["indicator"], dict):
        df.rename(variable=kwargs["indicator"], inplace=True)

    return df
예제 #2
0
def test_init_from_iamdf(test_df_year):
    # casting an IamDataFrame instance again works
    df = IamDataFrame(test_df_year)

    # inplace-operations on the new object have effects on the original object
    df.rename(scenario={'scen_a': 'scen_foo'}, inplace=True)
    assert test_df_year.scenario == ['scen_b', 'scen_foo']

    # overwrites on the new object do not have effects on the original object
    df = df.rename(scenario={'scen_foo': 'scen_bar'})
    assert df.scenario == ['scen_b', 'scen_bar']
    assert test_df_year.scenario == ['scen_b', 'scen_foo']
예제 #3
0
def _validate(df: pyam.IamDataFrame) -> pyam.IamDataFrame:
    """Validation function for variables, regions, and subannual time resolution"""

    # load definitions (including 'subannual' if included in the scenario data)
    if "subannual" in df.dimensions or df.time_col == "time":
        dimensions = ["region", "variable", "subannual"]
    else:
        dimensions = ["region", "variable"]

    definition = DataStructureDefinition(here / "definitions",
                                         dimensions=dimensions)

    # apply a renaming from region-synonyms to region-names
    rename_dict = {}

    for region, attibutes in definition.region.items():
        for synonym in ["abbr", "iso3"]:
            if synonym in attibutes:
                rename_dict[attibutes[synonym]] = region

    df.rename(region=rename_dict, inplace=True)

    # check variables and regions
    definition.validate(df, dimensions=["region", "variable"])

    # convert to subannual format if data provided in datetime format
    if df.time_col == "time":
        logger.info(
            'Re-casting from "time" column to categorical "subannual" format')
        df = df.swap_time_for_year(subannual=OE_SUBANNUAL_FORMAT)

    # check that any datetime-like items in "subannual" are valid datetime and UTC+01:00
    if "subannual" in df.dimensions:
        _datetime = [s for s in df.subannual if s not in definition.subannual]

        for d in _datetime:
            try:
                _dt = datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M%z")
            except ValueError:
                try:
                    datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M")
                except ValueError:
                    raise ValueError(f"Invalid subannual timeslice: {d}")

                raise ValueError(f"Missing timezone: {d}")

            # casting to datetime with timezone was successful
            if not (_dt.tzname() == EXP_TZ
                    or _dt.utcoffset() == EXP_TIME_OFFSET):
                raise ValueError(f"Invalid timezone: {d}")

    return df
예제 #4
0
def test_rename():
    df = IamDataFrame(
        pd.DataFrame(
            [
                ['model', 'scen', 'SST', 'test_1', 'unit', 1, 5],
                ['model', 'scen', 'SDN', 'test_2', 'unit', 2, 6],
                ['model', 'scen', 'SST', 'test_3', 'unit', 3, 7],
            ],
            columns=[
                'model', 'scenario', 'region', 'variable', 'unit', 2005, 2010
            ],
        ))

    mapping = {'variable': {'test_1': 'test', 'test_3': 'test'}}

    obs = df.rename(mapping).data.reset_index(drop=True)

    exp = IamDataFrame(
        pd.DataFrame(
            [
                ['model', 'scen', 'SST', 'test', 'unit', 4, 12],
                ['model', 'scen', 'SDN', 'test_2', 'unit', 2, 6],
            ],
            columns=[
                'model', 'scenario', 'region', 'variable', 'unit', 2005, 2010
            ],
        )).data.sort_values(by='region').reset_index(drop=True)

    pd.testing.assert_frame_equal(obs, exp, check_index_type=False)
예제 #5
0
파일: conftest.py 프로젝트: mabudz/pyam
def recursive_df(request):

    data = (RECURSIVE_DF if request.param == "year" else RECURSIVE_DF.rename(
        DTS_MAPPING, axis="columns"))

    df = IamDataFrame(data, model="model_a", scenario="scen_a", region="World")
    df2 = df.rename(scenario={"scen_a": "scen_b"})
    df2._data *= 2
    df.append(df2, inplace=True)

    yield df
예제 #6
0
def test_aggregate_recursive(time_col):
    # use the feature `recursive=True`
    data = (RECURSIVE_DF if time_col == "year" else RECURSIVE_DF.rename(
        DTS_MAPPING, axis="columns"))
    df = IamDataFrame(data, model="model_a", scenario="scen_a", region="World")
    df2 = df.rename(scenario={"scen_a": "scen_b"})
    df2.data.value *= 2
    df.append(df2, inplace=True)

    # create object without variables to be aggregated
    v = "Secondary Energy|Electricity"
    agg_vars = [f"{v}{i}" for i in ["", "|Wind"]]
    df_minimal = df.filter(variable=agg_vars, keep=False)

    # return recursively aggregated data as new object
    obs = df_minimal.aggregate(variable=v, recursive=True)
    assert_iamframe_equal(obs, df.filter(variable=agg_vars))

    # append to `self`
    df_minimal.aggregate(variable=v, recursive=True, append=True)
    assert_iamframe_equal(df_minimal, df)
예제 #7
0
def test_aggregate_recursive(time_col):
    # use the feature `recursive=True`
    data = RECURSIVE_DF if time_col == 'year' \
        else RECURSIVE_DF.rename(DTS_MAPPING, axis='columns')
    df = IamDataFrame(data, model='model_a', scenario='scen_a', region='World')
    df2 = df.rename(scenario={'scen_a': 'scen_b'})
    df2.data.value *= 2
    df.append(df2, inplace=True)

    # create object without variables to be aggregated
    v = 'Secondary Energy|Electricity'
    agg_vars = [f'{v}{i}' for i in ['', '|Wind']]
    df_minimal = df.filter(variable=agg_vars, keep=False)

    # return recursively aggregated data as new object
    obs = df_minimal.aggregate(variable=v, recursive=True)
    assert_iamframe_equal(obs, df.filter(variable=agg_vars))

    # append to `self`
    df_minimal.aggregate(variable=v, recursive=True, append=True)
    assert_iamframe_equal(df_minimal, df)