def test_concat_incompatible_cols(test_pd_df): """Check that calling concat on a single-item list returns identical object""" df1 = IamDataFrame(test_pd_df) test_pd_df["extra_col"] = "foo" df2 = IamDataFrame(test_pd_df) match = "Items have incompatible timeseries data dimensions" with pytest.raises(ValueError, match=match): concat([df1, df2])
def test_concat(test_df, reverse, iterable): other = test_df.filter(scenario="scen_b").rename( {"scenario": { "scen_b": "scen_c" }}) test_df.set_meta([0, 1], name="col1") test_df.set_meta(["a", "b"], name="col2") other.set_meta(2, name="col1") other.set_meta("x", name="col3") dfs = [test_df, other] if reverse: dfs = list(reversed(dfs)) if iterable: dfs = iter(dfs) result = concat(dfs) # check that the original object is not updated assert test_df.scenario == ["scen_a", "scen_b"] assert other.scenario == ["scen_c"] # assert that merging of meta works as expected (reorder columns) pdt.assert_frame_equal(result.meta[EXP_META.columns], EXP_META) # assert that appending data works as expected ts = result.timeseries() npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)
def test_concat_time_domain(test_pd_df, test_df_mixed, time, reverse): df_year = IamDataFrame(test_pd_df[IAMC_IDX + [2005]], meta=test_df_mixed.meta) df_time = IamDataFrame(test_pd_df[IAMC_IDX + [2010]].rename( {2010: time}, axis="columns")) # concat `df_time` to `df_year` if reverse: obs = concat([df_time, df_year]) else: obs = concat([df_year, df_time]) # assert that original objects were not modified assert df_year.year == [2005] assert df_time.time == pd.Index([datetime(2010, 7, 21)]) assert_iamframe_equal(obs, test_df_mixed)
def generate_combined_excel(): lst = [] for file in Pathways: filename = file.value + '_combined.csv' df = pyam.IamDataFrame(str(DEF_OUTPUT_PATH / filename)) lst.append(df) genesys = pyam.concat(lst) genesys = pyam.IamDataFrame(genesys.data[pyam.IAMC_IDX + ['year', 'value']]) genesys.to_excel(f'GENeSYS-MOD-pathways.xlsx')
def test_concat_with_pd_dataframe(test_df, reverse): other = test_df.filter(scenario="scen_b").rename( {"scenario": { "scen_b": "scen_c" }}) # merge with only the timeseries `data` DataFrame of `other` if reverse: result = concat([other.data, test_df]) else: result = concat([test_df, other.data]) # check that the original object is not updated assert test_df.scenario == ["scen_a", "scen_b"] # assert that merging meta from `other` is ignored exp_meta = META_DF.copy() exp_meta.loc[("model_a", "scen_c"), "number"] = np.nan exp_meta["exclude"] = False pdt.assert_frame_equal(result.meta, exp_meta[["exclude"] + META_COLS]) # assert that appending data works as expected ts = result.timeseries() npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)
def test_relationship_usage(self, test_db, test_downscale_df, add_col): tcruncher = self.tclass(test_db) lead = ["Emissions|HFC|C2F6"] follow = "Emissions|HFC|C5F12" filler = tcruncher.derive_relationship(follow, lead) test_downscale_df = self._adjust_time_style_to_match( test_downscale_df, test_db) if add_col: add_col_val = "blah" test_downscale_df[add_col] = add_col_val test_downscale_df = IamDataFrame(test_downscale_df.data) assert test_downscale_df.extra_cols[0] == add_col res = filler(test_downscale_df) scen_b_df = test_db.filter(variable="Emissions|HFC|C5F12") scen_c_df = test_db.filter(variable="Emissions|HFC|C5F12") scen_b_df["model"] = "model_b" scen_b_df["scenario"] = "scen_b" scen_c_df["model"] = "model_b" scen_c_df["scenario"] = "scen_c" if add_col: scen_c_df[add_col] = add_col_val scen_c_df = IamDataFrame(scen_c_df.data) scen_b_df[add_col] = add_col_val scen_b_df = IamDataFrame(scen_b_df.data) exp = concat([scen_b_df, scen_c_df]) pd.testing.assert_frame_equal(res.timeseries(), exp.timeseries(), check_like=True) # comes back on input timepoints np.testing.assert_array_equal( res.timeseries().columns.values.squeeze(), exp.timeseries().columns.values.squeeze(), ) # Test we can append the answer to the original appended_df = test_downscale_df.filter(variable=lead).append(res) assert appended_df.filter(variable=follow).equals(res) if add_col: assert all( appended_df.filter(variable=follow)[add_col] == add_col_val)
def test_concat_non_default_index(): # Test that merging two IamDataFrames with identical, non-standard index dimensions # preserves the index. df1 = IamDataFrame( pd.DataFrame( [["model_a", "scenario_a", "region_a", "variable_a", "unit", 1, 1] ], columns=IAMC_IDX + ["version", 2005], ), index=META_IDX + ["version"], ) df2 = IamDataFrame( pd.DataFrame( [["model_a", "scenario_a", "region_a", "variable_a", "unit", 2, 2] ], columns=IAMC_IDX + ["version", 2005], ), index=META_IDX + ["version"], ) exp = IamDataFrame( pd.DataFrame( [ [ "model_a", "scenario_a", "region_a", "variable_a", "unit", 1, 1 ], [ "model_a", "scenario_a", "region_a", "variable_a", "unit", 2, 2 ], ], columns=IAMC_IDX + ["version", 2005], ), index=META_IDX + ["version"], ) assert_iamframe_equal(exp, concat([df1, df2]))
def test_concat_fails_iterable(arg, msg): """Check that calling concat with a non-iterable raises""" match = f"'{msg}' object is not iterable" with pytest.raises(TypeError, match=match): concat(arg)
def test_concat_fails_empty(): """Check that calling concat with empty or none raises""" match = "No objects to concatenate" with pytest.raises(ValueError, match=match): concat([])
def test_concat_single_item(test_df): """Check that calling concat on a single-item list returns identical object""" obs = concat([test_df]) assert_iamframe_equal(obs, test_df)
def concat(*args): """Concatenate *args*, which must be :class:`pyam.IamDataFrame`.""" return pyam.concat(args)
fill=1 if file.find('Flow')>-1: print('flow') iamc_unit="MWh" region=col iamc_variable=cfg['listfiles'][file] fill=1 if fill==1: iamc=pd.DataFrame({'Model':cfg['model'],'Scenario':scenario,'Region':region,'Variable':iamc_variable, 'Unit':iamc_unit,'time':cfg['T'],'value':data[col]}) if i==0: big_iamc=iamc i=1 else: big_iamc=pyam.concat(big_iamc,iamc) # aggregate scenarios if more than 1 if cfg['scenarios']>1 and cfg['aggregate scenarios']=='yes': # aggregate the scenarios ###########################" # get pandas from iamc _data=big_iamc.data # group by scenarios and get mean _meandata=_data.groupby('Scenario').mean().reset_index() _meandata['Scenario']=cfg['scenario'] _meandata=_meandata[['Model','Scenario','Region','Variable','Unit','time','value']]
def filler(in_iamdf): """ Filler function derived from :obj:`RMSClosest`. Parameters ---------- in_iamdf : :obj:`pyam.IamDataFrame` Input data to fill data in Returns ------- :obj:`pyam.IamDataFrame` Filled in data (without original source data) Raises ------ ValueError If there are any inconsistencies between the timeseries, units or expectations of the program and ``in_iamdf``, compared to the database used to generate this ``filler`` function. """ lead_var = in_iamdf.filter(variable=variable_leaders) var_units = lead_var["unit"].unique() if len(var_units) != 1: raise ValueError("More than one unit detected for input timeseries") var_units = var_units[0] if var_units != leader_unit: raise ValueError( "Units of lead variable is meant to be {}, found {}".format( leader_unit, var_units ) ) if data_follower_time_col != in_iamdf.time_col: raise ValueError( "`in_iamdf` time column must be the same as the time column used " "to generate this filler function (`{}`)".format( data_follower_time_col ) ) key_timepoints_filter_iamdf = { data_follower_time_col: list(set(lead_var[data_follower_time_col])) } key_timepoints_filter_lead = { data_follower_time_col: list(set(iamdf_lead[data_follower_time_col])) } def get_values_at_key_timepoints(idf, time_filter): # filter warning about empty data frame as we handle it ourselves to_return = idf.filter(**time_filter) if to_return.data.empty: raise ValueError( "No time series overlap between the original and unfilled data" ) return to_return lead_var_timeseries = get_values_at_key_timepoints( lead_var, key_timepoints_filter_lead ).timeseries() iamdf_lead_timeseries = get_values_at_key_timepoints( iamdf_lead, key_timepoints_filter_iamdf ).timeseries() output_ts_list = [] for label, row in lead_var_timeseries.iterrows(): closest_ts = _select_closest(iamdf_lead_timeseries, row) # Filter to find the matching follow data for the same model, scenario # and region tmp = iamdf_follower.filter( model=closest_ts["model"], scenario=closest_ts["scenario"] ).data # Update the model and scenario to match the elements of the input. tmp["model"] = label[lead_var_timeseries.index.names.index("model")] tmp["scenario"] = label[ lead_var_timeseries.index.names.index("scenario") ] output_ts_list.append(tmp) if in_iamdf.extra_cols: for col in in_iamdf.extra_cols: tmp[col] = label[lead_var_timeseries.index.names.index(col)] return pyam.concat(output_ts_list)