def _validate(df: pyam.IamDataFrame) -> pyam.IamDataFrame:
    """Validation function for variables, regions, and subannual time resolution"""

    # load definitions (including 'subannual' if included in the scenario data)
    if "subannual" in df.dimensions or df.time_col == "time":
        dimensions = ["region", "variable", "subannual"]
    else:
        dimensions = ["region", "variable"]

    definition = DataStructureDefinition(here / "definitions",
                                         dimensions=dimensions)

    # apply a renaming from region-synonyms to region-names
    rename_dict = {}

    for region, attibutes in definition.region.items():
        for synonym in ["abbr", "iso3"]:
            if synonym in attibutes:
                rename_dict[attibutes[synonym]] = region

    df.rename(region=rename_dict, inplace=True)

    # check variables and regions
    definition.validate(df, dimensions=["region", "variable"])

    # convert to subannual format if data provided in datetime format
    if df.time_col == "time":
        logger.info(
            'Re-casting from "time" column to categorical "subannual" format')
        df = df.swap_time_for_year(subannual=OE_SUBANNUAL_FORMAT)

    # check that any datetime-like items in "subannual" are valid datetime and UTC+01:00
    if "subannual" in df.dimensions:
        _datetime = [s for s in df.subannual if s not in definition.subannual]

        for d in _datetime:
            try:
                _dt = datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M%z")
            except ValueError:
                try:
                    datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M")
                except ValueError:
                    raise ValueError(f"Invalid subannual timeslice: {d}")

                raise ValueError(f"Missing timezone: {d}")

            # casting to datetime with timezone was successful
            if not (_dt.tzname() == EXP_TZ
                    or _dt.utcoffset() == EXP_TIME_OFFSET):
                raise ValueError(f"Invalid timezone: {d}")

    return df
Example #2
0
def test_cast_by_column_concat(meta_df):
    dts = TEST_DTS
    df = pd.DataFrame([
        ['scen_a', 'World', 'Primary Energy', None, 'EJ/y', 1, 6.],
        ['scen_a', 'World', 'Primary Energy', 'Coal', 'EJ/y', 0.5, 3],
        ['scen_b', 'World', 'Primary Energy', None, 'EJ/y', 2, 7],
    ],
        columns=['scenario', 'region', 'var_1', 'var_2', 'unit'] + dts,
    )

    df = IamDataFrame(df, model='model_a', variable=['var_1', 'var_2'])
    if "year" in meta_df.data.columns:
        df = df.swap_time_for_year()

    assert compare(meta_df, df).empty
    pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
Example #3
0
def test_cast_from_value_col(meta_df):
    df_with_value_cols = pd.DataFrame([
        ['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
        ['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
        ['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
        ['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
    ],
        columns=['model', 'scenario', 'region', 'unit', 'time',
                 'Primary Energy', 'Primary Energy|Coal'],
    )
    df = IamDataFrame(df_with_value_cols,
                      value=['Primary Energy', 'Primary Energy|Coal'])
    if "year" in meta_df.data.columns:
        df = df.swap_time_for_year()

    assert compare(meta_df, df).empty
    pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
Example #4
0
def test_cast_from_value_col_and_args(meta_df):
    # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210)
    df_with_value_cols = pd.DataFrame([
        ['scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
        ['scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
        ['scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
        ['scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
    ],
        columns=['scenario', 'iso', 'unit', 'time',
                 'Primary Energy', 'Primary Energy|Coal'],
    )
    df = IamDataFrame(df_with_value_cols, model='model_a', region='iso',
                      value=['Primary Energy', 'Primary Energy|Coal'])
    if "year" in meta_df.data.columns:
        df = df.swap_time_for_year()

    assert compare(meta_df, df).empty
    pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
Example #5
0
def test_swap_time_to_year_subannual(test_pd_df, columns, subannual, dates,
                                     inplace):
    """Swap time column for year (int) keeping subannual resolution as extra-column"""

    test_pd_df.rename({
        2005: columns[0],
        2010: columns[1]
    },
                      axis=1,
                      inplace=True)

    # check swapping time for year
    df = IamDataFrame(test_pd_df)
    obs = df.swap_time_for_year(subannual=subannual, inplace=inplace)

    if inplace:
        assert obs is None
        obs = df

    exp = get_subannual_df(dates[0], dates[1])
    assert_iamframe_equal(obs, exp)

    # check that reverting using `swap_year_for_time` yields the original data
    assert_iamframe_equal(obs.swap_year_for_time(), IamDataFrame(test_pd_df))