def test_cast_with_model_arg(test_df): df = test_df.timeseries().reset_index() df.rename(columns={"model": "foo"}, inplace=True) df = IamDataFrame(df, model="foo") assert compare(test_df, df).empty pd.testing.assert_frame_equal(df.data, test_df.data)
def test_cast_from_value_col_and_args(test_df_year): # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210) df_with_value_cols = pd.DataFrame( [ ["scen_a", "World", "EJ/yr", 2005, 1, 0.5], ["scen_a", "World", "EJ/yr", 2010, 6.0, 3], ["scen_b", "World", "EJ/yr", 2005, 2, None], ["scen_b", "World", "EJ/yr", 2010, 7, None], ], columns=[ "scenario", "iso", "unit", "year", "Primary Energy", "Primary Energy|Coal", ], ) df = IamDataFrame( df_with_value_cols, model="model_a", region="iso", value=["Primary Energy", "Primary Energy|Coal"], ) assert compare(test_df_year, df).empty pd.testing.assert_frame_equal(df.data, test_df_year.data, check_like=True)
def test_cast_with_model_arg(meta_df): df = meta_df.timeseries().reset_index() df.rename(columns={'model': 'foo'}, inplace=True) df = IamDataFrame(df, model='foo') assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data)
def test_compare(meta_df): clone = copy.deepcopy(meta_df) clone.data.iloc[0, clone.data.columns.get_loc('value')] = 2 clone.rename({'variable': { 'Primary Energy|Coal': 'Primary Energy|Gas' }}, inplace=True) obs = compare(meta_df, clone, right_label='meta_df', left_label='clone') exp = pd.DataFrame( [ ['Primary Energy', 'EJ/y', 2005, 2, 1], ['Primary Energy|Coal', 'EJ/y', 2005, np.nan, 0.5], ['Primary Energy|Coal', 'EJ/y', 2010, np.nan, 3], ['Primary Energy|Gas', 'EJ/y', 2005, 0.5, np.nan], ['Primary Energy|Gas', 'EJ/y', 2010, 3, np.nan], ], columns=['variable', 'unit', 'year', 'meta_df', 'clone'], ) exp['model'] = 'model_a' exp['scenario'] = 'scen_a' exp['region'] = 'World' exp = exp.set_index(IAMC_IDX + ['year']) pd.testing.assert_frame_equal(obs, exp)
def test_compare(meta_df): clone = copy.deepcopy(meta_df) clone.data.iloc[0, clone.data.columns.get_loc('value')] = 2 clone.rename({'variable': {'Primary Energy|Coal': 'Primary Energy|Gas'}}, inplace=True) obs = compare(meta_df, clone, right_label='meta_df', left_label='clone') exp = pd.DataFrame([ ['Primary Energy', 'EJ/y', dt.datetime(2005, 6, 17), 2, 1], ['Primary Energy|Coal', 'EJ/y', dt.datetime(2005, 6, 17), np.nan, 0.5], ['Primary Energy|Coal', 'EJ/y', dt.datetime(2010, 7, 21), np.nan, 3], ['Primary Energy|Gas', 'EJ/y', dt.datetime(2005, 6, 17), 0.5, np.nan], ['Primary Energy|Gas', 'EJ/y', dt.datetime(2010, 7, 21), 3, np.nan], ], columns=['variable', 'unit', 'time', 'meta_df', 'clone'], ) exp['model'] = 'model_a' exp['scenario'] = 'scen_a' exp['region'] = 'World' time_col = 'time' if 'year' in meta_df.data.columns: exp['year'] = exp['time'].apply(lambda x: x.year) exp = exp.drop('time', axis='columns') time_col = 'year' exp = exp.set_index(IAMC_IDX + [time_col]) pd.testing.assert_frame_equal(obs, exp)
def test_cast_with_variable_and_value(meta_df): pe_df = meta_df.filter(variable='Primary Energy') df = pe_df.data.rename(columns={'value': 'lvl'}).drop('variable', axis=1) df = IamDataFrame(df, variable='Primary Energy', value='lvl') assert compare(pe_df, df).empty pd.testing.assert_frame_equal(df.data, pe_df.data.reset_index(drop=True))
def test_swap_time_to_year(test_df, inplace): if "year" in test_df.data: return # year df not relevant for this test exp = test_df.data.copy() exp["year"] = exp["time"].apply(lambda x: x.year) exp = exp.drop("time", axis="columns") exp = IamDataFrame(exp) obs = test_df.swap_time_for_year(inplace=inplace) if inplace: assert obs is None assert compare(test_df, exp).empty else: assert compare(obs, exp).empty assert "year" not in test_df.data.columns
def test_cast_with_variable_and_value(test_df): pe_df = test_df.filter(variable="Primary Energy") df = pe_df.data.rename(columns={"value": "lvl"}).drop("variable", axis=1) df = IamDataFrame(df, variable="Primary Energy", value="lvl") assert compare(pe_df, df).empty pd.testing.assert_frame_equal(df.data, pe_df.data.reset_index(drop=True))
def test_cast_from_r_df(test_pd_df): df = test_pd_df.copy() # last two columns are years df.columns = list( df.columns[:-2]) + ["X{}".format(c) for c in df.columns[-2:]] obs = IamDataFrame(df) exp = IamDataFrame(test_pd_df) assert compare(obs, exp).empty pd.testing.assert_frame_equal(obs.data, exp.data)
def test_cast_by_column_concat(meta_df): df = pd.DataFrame( [ ['scen_a', 'World', 'Primary Energy', None, 'EJ/y', 1, 6.], ['scen_a', 'World', 'Primary Energy', 'Coal', 'EJ/y', 0.5, 3], ['scen_b', 'World', 'Primary Energy', None, 'EJ/y', 2, 7], ], columns=['scenario', 'region', 'var_1', 'var_2', 'unit', 2005, 2010], ) df = IamDataFrame(df, model='model_a', variable=['var_1', 'var_2']) assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data)
def test_cast_by_column_concat(test_df_year): df = pd.DataFrame( [ ["scen_a", "World", "Primary Energy", None, "EJ/yr", 1, 6.0], ["scen_a", "World", "Primary Energy", "Coal", "EJ/yr", 0.5, 3], ["scen_b", "World", "Primary Energy", None, "EJ/yr", 2, 7], ], columns=["scenario", "region", "var_1", "var_2", "unit", 2005, 2010], ) df = IamDataFrame(df, model="model_a", variable=["var_1", "var_2"]) assert compare(test_df_year, df).empty pd.testing.assert_frame_equal(df.data, test_df_year.data, check_like=True)
def test_cast_from_value_col(test_df_year): df_with_value_cols = pd.DataFrame([ ['model_a', 'scen_a', 'World', 'EJ/yr', 2005, 1, 0.5], ['model_a', 'scen_a', 'World', 'EJ/yr', 2010, 6., 3], ['model_a', 'scen_b', 'World', 'EJ/yr', 2005, 2, None], ['model_a', 'scen_b', 'World', 'EJ/yr', 2010, 7, None] ], columns=['model', 'scenario', 'region', 'unit', 'year', 'Primary Energy', 'Primary Energy|Coal'], ) df = IamDataFrame(df_with_value_cols, value=['Primary Energy', 'Primary Energy|Coal']) assert compare(test_df_year, df).empty pd.testing.assert_frame_equal(df.data, test_df_year.data, check_like=True)
def test_cast_by_column_concat(meta_df): dts = TEST_DTS df = pd.DataFrame([ ['scen_a', 'World', 'Primary Energy', None, 'EJ/y', 1, 6.], ['scen_a', 'World', 'Primary Energy', 'Coal', 'EJ/y', 0.5, 3], ['scen_b', 'World', 'Primary Energy', None, 'EJ/y', 2, 7], ], columns=['scenario', 'region', 'var_1', 'var_2', 'unit'] + dts, ) df = IamDataFrame(df, model='model_a', variable=['var_1', 'var_2']) if "year" in meta_df.data.columns: df = df.swap_time_for_year() assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
def test_cast_from_value_col_and_args(test_df_year): # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210) df_with_value_cols = pd.DataFrame([ ['scen_a', 'World', 'EJ/yr', 2005, 1, 0.5], ['scen_a', 'World', 'EJ/yr', 2010, 6., 3], ['scen_b', 'World', 'EJ/yr', 2005, 2, None], ['scen_b', 'World', 'EJ/yr', 2010, 7, None] ], columns=['scenario', 'iso', 'unit', 'year', 'Primary Energy', 'Primary Energy|Coal'], ) df = IamDataFrame(df_with_value_cols, model='model_a', region='iso', value=['Primary Energy', 'Primary Energy|Coal']) assert compare(test_df_year, df).empty pd.testing.assert_frame_equal(df.data, test_df_year.data, check_like=True)
def test_compare(test_df): clone = test_df.copy() clone._data.iloc[0] = 2 clone.rename(variable={"Primary Energy|Coal": "Primary Energy|Gas"}, inplace=True) obs = compare(test_df, clone, right_label="test_df", left_label="clone") exp = pd.DataFrame( [ ["Primary Energy", "EJ/yr", dt.datetime(2005, 6, 17), 2, 1], [ "Primary Energy|Coal", "EJ/yr", dt.datetime(2005, 6, 17), np.nan, 0.5 ], [ "Primary Energy|Coal", "EJ/yr", dt.datetime(2010, 7, 21), np.nan, 3 ], [ "Primary Energy|Gas", "EJ/yr", dt.datetime(2005, 6, 17), 0.5, np.nan ], [ "Primary Energy|Gas", "EJ/yr", dt.datetime(2010, 7, 21), 3, np.nan ], ], columns=["variable", "unit", "time", "test_df", "clone"], ) exp["model"] = "model_a" exp["scenario"] = "scen_a" exp["region"] = "World" time_col = "time" if test_df.time_col == "year": exp["year"] = exp["time"].apply(lambda x: x.year) exp = exp.drop("time", axis="columns") time_col = "year" else: obs = obs.reset_index() obs.time = obs.time.dt.normalize() obs = obs.set_index(IAMC_IDX + [time_col]) exp = exp.set_index(IAMC_IDX + [time_col]) pd.testing.assert_frame_equal(obs, exp)
def test_cast_from_value_col(meta_df): df_with_value_cols = pd.DataFrame([ ['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5], ['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3], ['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None], ['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None] ], columns=['model', 'scenario', 'region', 'unit', 'time', 'Primary Energy', 'Primary Energy|Coal'], ) df = IamDataFrame(df_with_value_cols, value=['Primary Energy', 'Primary Energy|Coal']) if "year" in meta_df.data.columns: df = df.swap_time_for_year() assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
def test_compare(test_df): clone = test_df.copy() clone._data.iloc[0] = 2 clone.rename(variable={'Primary Energy|Coal': 'Primary Energy|Gas'}, inplace=True) obs = compare(test_df, clone, right_label='test_df', left_label='clone') exp = pd.DataFrame( [ ['Primary Energy', 'EJ/yr', dt.datetime(2005, 6, 17), 2, 1], [ 'Primary Energy|Coal', 'EJ/yr', dt.datetime(2005, 6, 17), np.nan, .5 ], [ 'Primary Energy|Coal', 'EJ/yr', dt.datetime(2010, 7, 21), np.nan, 3 ], [ 'Primary Energy|Gas', 'EJ/yr', dt.datetime(2005, 6, 17), 0.5, np.nan ], [ 'Primary Energy|Gas', 'EJ/yr', dt.datetime(2010, 7, 21), 3, np.nan ], ], columns=['variable', 'unit', 'time', 'test_df', 'clone'], ) exp['model'] = 'model_a' exp['scenario'] = 'scen_a' exp['region'] = 'World' time_col = 'time' if test_df.time_col == 'year': exp['year'] = exp['time'].apply(lambda x: x.year) exp = exp.drop('time', axis='columns') time_col = 'year' else: obs = obs.reset_index() obs.time = obs.time.dt.normalize() obs = obs.set_index(IAMC_IDX + [time_col]) exp = exp.set_index(IAMC_IDX + [time_col]) pd.testing.assert_frame_equal(obs, exp)
def test_cast_from_value_col_and_args(meta_df): # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210) df_with_value_cols = pd.DataFrame([ ['scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5], ['scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3], ['scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None], ['scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None] ], columns=['scenario', 'iso', 'unit', 'time', 'Primary Energy', 'Primary Energy|Coal'], ) df = IamDataFrame(df_with_value_cols, model='model_a', region='iso', value=['Primary Energy', 'Primary Energy|Coal']) if "year" in meta_df.data.columns: df = df.swap_time_for_year() assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
def test_rename_duplicates(): mapping = {'variable': {'test_1': 'test_3'}} pytest.raises(ValueError, RENAME_DF.rename, **mapping) obs = RENAME_DF.rename(check_duplicates=False, **mapping) exp = IamDataFrame( pd.DataFrame( [ ['model', 'scen', 'region_a', 'test_2', 'unit', 2, 6], ['model', 'scen', 'region_a', 'test_3', 'unit', 4, 12], ['model', 'scen', 'region_b', 'test_3', 'unit', 4, 8], ], columns=IAMC_IDX + [2005, 2010], )) assert compare(obs, exp).empty pd.testing.assert_frame_equal(obs.data, exp.data)
def test_rename_duplicates(): mapping = {"variable": {"test_1": "test_3"}} pytest.raises(ValueError, RENAME_DF.rename, **mapping) obs = RENAME_DF.rename(check_duplicates=False, **mapping) exp = IamDataFrame( pd.DataFrame( [ ["model", "scen", "region_a", "test_2", "unit", 2, 6], ["model", "scen", "region_a", "test_3", "unit", 4, 12], ["model", "scen", "region_b", "test_3", "unit", 4, 8], ], columns=IAMC_IDX + [2005, 2010], )) assert compare(obs, exp).empty pd.testing.assert_frame_equal(obs.data, exp.data)
def test_swap_time_to_year(test_df, inplace): if "year" in test_df.data: return # year df not relevant for this test exp = test_df.data.copy() exp["year"] = exp["time"].apply(lambda x: x.year) exp = exp.drop("time", axis="columns") exp = IamDataFrame(exp) obs = test_df.swap_time_for_year(inplace=inplace) if inplace: assert obs is None obs = test_df assert compare(obs, exp).empty assert obs.year == [2005, 2010] with pytest.raises(AttributeError): obs.time
def test_cast_from_value_col(test_df_year): df_with_value_cols = pd.DataFrame( [ ["model_a", "scen_a", "World", "EJ/yr", 2005, 1, 0.5], ["model_a", "scen_a", "World", "EJ/yr", 2010, 6.0, 3], ["model_a", "scen_b", "World", "EJ/yr", 2005, 2, None], ["model_a", "scen_b", "World", "EJ/yr", 2010, 7, None], ], columns=[ "model", "scenario", "region", "unit", "year", "Primary Energy", "Primary Energy|Coal", ], ) df = IamDataFrame(df_with_value_cols, value=["Primary Energy", "Primary Energy|Coal"]) assert compare(test_df_year, df).empty pd.testing.assert_frame_equal(df.data, test_df_year.data, check_like=True)
def _perform_crunch_and_check( required_variables, leaders, to_fill, df, type_of_cruncher, output_timesteps, to_fill_orig, check_data_returned=False, **kwargs, ): """ Takes a list of scenarios to infill and infills them according to the options presented. Parameters ---------- required_variables : list[str] The variable names to infill leaders : list[str] The leaders to guide the infilling to_fill : IamDataFrame The data frame to infill df : IamDataFrame The data frame to base the infilling on type_of_cruncher : :obj: silicone cruncher the silicone package cruncher class to use for the infilling output_timesteps : list[int or datetime] When there should be data returned. Time-based interpolation will occur if this is more frequent than the data allows, data will be filtered out if there is additional time information. to_fill_orig : IamDataFrame The original, unfiltered and unaltered data input. We use this for performing checks. kwargs : Dict Any key word arguments to include in the cruncher calculation Returns ------- :obj:IamDataFrame The infilled dataframe """ cruncher = type_of_cruncher(df) for req_var in tqdm.tqdm(required_variables, desc="Filling required variables"): interpolated = _infill_variable(cruncher, req_var, leaders, to_fill, **kwargs) if interpolated: to_fill = to_fill.append(interpolated) # Optionally check we have added all the required data if not check_data_returned: return to_fill for _, (model, scenario) in (to_fill[["model", "scenario" ]].drop_duplicates().iterrows()): msdf = to_fill.filter(model=model, scenario=scenario) for v in required_variables: msvdf = msdf.filter(variable=v) msvdf_data = msvdf.data assert not msvdf_data.isnull().any().any() assert not msvdf_data.empty if df.time_col == "year": assert all([ y in msvdf_data[df.time_col].values for y in output_timesteps ]), "We do not have data for all required timesteps" else: output_timesteps_datetime = pd.to_datetime(output_timesteps) assert all([ y in msvdf_data[df.time_col].values for y in output_timesteps_datetime.values ]), "We do not have data for all required timesteps" # Check no data was overwritten by accident for model in tqdm.tqdm(to_fill_orig.models(), desc="Consistency with original model data checks"): mdf = to_fill_orig.filter(model=model, variable=leaders + required_variables) for scenario in mdf.scenarios(): msdf = mdf.filter(scenario=scenario) msdf_filled = to_fill.filter(model=model, scenario=scenario, variable=msdf["variable"].unique()) common_times = set(msdf_filled[msdf.time_col]).intersection( msdf[msdf.time_col]) if common_times: if msdf.time_col == "year": msdf = msdf.filter(year=list(common_times)) msdf_filled = msdf_filled.filter(year=list(common_times)) else: msdf = msdf.filter(time=list(common_times)) msdf_filled = msdf_filled.filter(time=list(common_times)) assert pyam.compare(msdf, msdf_filled).empty return to_fill