예제 #1
0
def test_concat_incompatible_cols(test_pd_df):
    """Check that calling concat on a single-item list returns identical object"""
    df1 = IamDataFrame(test_pd_df)
    test_pd_df["extra_col"] = "foo"
    df2 = IamDataFrame(test_pd_df)

    match = "Items have incompatible timeseries data dimensions"
    with pytest.raises(ValueError, match=match):
        concat([df1, df2])
예제 #2
0
def test_concat(test_df, reverse, iterable):
    other = test_df.filter(scenario="scen_b").rename(
        {"scenario": {
            "scen_b": "scen_c"
        }})

    test_df.set_meta([0, 1], name="col1")
    test_df.set_meta(["a", "b"], name="col2")

    other.set_meta(2, name="col1")
    other.set_meta("x", name="col3")

    dfs = [test_df, other]
    if reverse:
        dfs = list(reversed(dfs))
    if iterable:
        dfs = iter(dfs)

    result = concat(dfs)

    # check that the original object is not updated
    assert test_df.scenario == ["scen_a", "scen_b"]
    assert other.scenario == ["scen_c"]

    # assert that merging of meta works as expected (reorder columns)
    pdt.assert_frame_equal(result.meta[EXP_META.columns], EXP_META)

    # assert that appending data works as expected
    ts = result.timeseries()
    npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)
예제 #3
0
def test_concat_time_domain(test_pd_df, test_df_mixed, time, reverse):

    df_year = IamDataFrame(test_pd_df[IAMC_IDX + [2005]],
                           meta=test_df_mixed.meta)
    df_time = IamDataFrame(test_pd_df[IAMC_IDX + [2010]].rename(
        {2010: time}, axis="columns"))

    # concat `df_time` to `df_year`
    if reverse:
        obs = concat([df_time, df_year])
    else:
        obs = concat([df_year, df_time])

    # assert that original objects were not modified
    assert df_year.year == [2005]
    assert df_time.time == pd.Index([datetime(2010, 7, 21)])

    assert_iamframe_equal(obs, test_df_mixed)
예제 #4
0
def generate_combined_excel():
    lst = []

    for file in Pathways:
        filename = file.value + '_combined.csv'
        df = pyam.IamDataFrame(str(DEF_OUTPUT_PATH / filename))
        lst.append(df)

    genesys = pyam.concat(lst)
    genesys = pyam.IamDataFrame(genesys.data[pyam.IAMC_IDX +
                                             ['year', 'value']])
    genesys.to_excel(f'GENeSYS-MOD-pathways.xlsx')
예제 #5
0
def test_concat_with_pd_dataframe(test_df, reverse):
    other = test_df.filter(scenario="scen_b").rename(
        {"scenario": {
            "scen_b": "scen_c"
        }})

    # merge with only the timeseries `data` DataFrame of `other`
    if reverse:
        result = concat([other.data, test_df])
    else:
        result = concat([test_df, other.data])

    # check that the original object is not updated
    assert test_df.scenario == ["scen_a", "scen_b"]

    # assert that merging meta from `other` is ignored
    exp_meta = META_DF.copy()
    exp_meta.loc[("model_a", "scen_c"), "number"] = np.nan
    exp_meta["exclude"] = False
    pdt.assert_frame_equal(result.meta, exp_meta[["exclude"] + META_COLS])

    # assert that appending data works as expected
    ts = result.timeseries()
    npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)
    def test_relationship_usage(self, test_db, test_downscale_df, add_col):
        tcruncher = self.tclass(test_db)
        lead = ["Emissions|HFC|C2F6"]
        follow = "Emissions|HFC|C5F12"
        filler = tcruncher.derive_relationship(follow, lead)

        test_downscale_df = self._adjust_time_style_to_match(
            test_downscale_df, test_db)
        if add_col:
            add_col_val = "blah"
            test_downscale_df[add_col] = add_col_val
            test_downscale_df = IamDataFrame(test_downscale_df.data)
            assert test_downscale_df.extra_cols[0] == add_col
        res = filler(test_downscale_df)

        scen_b_df = test_db.filter(variable="Emissions|HFC|C5F12")
        scen_c_df = test_db.filter(variable="Emissions|HFC|C5F12")
        scen_b_df["model"] = "model_b"
        scen_b_df["scenario"] = "scen_b"
        scen_c_df["model"] = "model_b"
        scen_c_df["scenario"] = "scen_c"
        if add_col:
            scen_c_df[add_col] = add_col_val
            scen_c_df = IamDataFrame(scen_c_df.data)
            scen_b_df[add_col] = add_col_val
            scen_b_df = IamDataFrame(scen_b_df.data)
        exp = concat([scen_b_df, scen_c_df])

        pd.testing.assert_frame_equal(res.timeseries(),
                                      exp.timeseries(),
                                      check_like=True)

        # comes back on input timepoints
        np.testing.assert_array_equal(
            res.timeseries().columns.values.squeeze(),
            exp.timeseries().columns.values.squeeze(),
        )

        # Test we can append the answer to the original
        appended_df = test_downscale_df.filter(variable=lead).append(res)
        assert appended_df.filter(variable=follow).equals(res)
        if add_col:
            assert all(
                appended_df.filter(variable=follow)[add_col] == add_col_val)
예제 #7
0
def test_concat_non_default_index():
    # Test that merging two IamDataFrames with identical, non-standard index dimensions
    # preserves the index.

    df1 = IamDataFrame(
        pd.DataFrame(
            [["model_a", "scenario_a", "region_a", "variable_a", "unit", 1, 1]
             ],
            columns=IAMC_IDX + ["version", 2005],
        ),
        index=META_IDX + ["version"],
    )
    df2 = IamDataFrame(
        pd.DataFrame(
            [["model_a", "scenario_a", "region_a", "variable_a", "unit", 2, 2]
             ],
            columns=IAMC_IDX + ["version", 2005],
        ),
        index=META_IDX + ["version"],
    )
    exp = IamDataFrame(
        pd.DataFrame(
            [
                [
                    "model_a", "scenario_a", "region_a", "variable_a", "unit",
                    1, 1
                ],
                [
                    "model_a", "scenario_a", "region_a", "variable_a", "unit",
                    2, 2
                ],
            ],
            columns=IAMC_IDX + ["version", 2005],
        ),
        index=META_IDX + ["version"],
    )

    assert_iamframe_equal(exp, concat([df1, df2]))
예제 #8
0
def test_concat_fails_iterable(arg, msg):
    """Check that calling concat with a non-iterable raises"""
    match = f"'{msg}' object is not iterable"
    with pytest.raises(TypeError, match=match):
        concat(arg)
예제 #9
0
def test_concat_fails_empty():
    """Check that calling concat with empty or none raises"""
    match = "No objects to concatenate"
    with pytest.raises(ValueError, match=match):
        concat([])
예제 #10
0
def test_concat_single_item(test_df):
    """Check that calling concat on a single-item list returns identical object"""
    obs = concat([test_df])
    assert_iamframe_equal(obs, test_df)
예제 #11
0
def concat(*args):
    """Concatenate *args*, which must be :class:`pyam.IamDataFrame`."""
    return pyam.concat(args)
예제 #12
0
			fill=1
				
		if file.find('Flow')>-1:
			print('flow')
			iamc_unit="MWh"	
			region=col
			iamc_variable=cfg['listfiles'][file]
			fill=1
		
		if fill==1:		
			iamc=pd.DataFrame({'Model':cfg['model'],'Scenario':scenario,'Region':region,'Variable':iamc_variable, 'Unit':iamc_unit,'time':cfg['T'],'value':data[col]})
			if i==0:
				big_iamc=iamc
				i=1
			else:
				big_iamc=pyam.concat(big_iamc,iamc)
			
# aggregate scenarios if more than 1
if cfg['scenarios']>1 and cfg['aggregate scenarios']=='yes':
	
	# aggregate the scenarios 
	###########################"
	
	# get pandas from iamc
	_data=big_iamc.data
	
	# group by scenarios and get mean
	_meandata=_data.groupby('Scenario').mean().reset_index()
	_meandata['Scenario']=cfg['scenario']
	_meandata=_meandata[['Model','Scenario','Region','Variable','Unit','time','value']]
				
예제 #13
0
        def filler(in_iamdf):
            """
            Filler function derived from :obj:`RMSClosest`.

            Parameters
            ----------
            in_iamdf : :obj:`pyam.IamDataFrame`
                Input data to fill data in

            Returns
            -------
            :obj:`pyam.IamDataFrame`
                Filled in data (without original source data)

            Raises
            ------
            ValueError
                If there are any inconsistencies between the timeseries, units or
                expectations of the program and ``in_iamdf``, compared to the database
                used to generate this ``filler`` function.
            """
            lead_var = in_iamdf.filter(variable=variable_leaders)

            var_units = lead_var["unit"].unique()
            if len(var_units) != 1:
                raise ValueError("More than one unit detected for input timeseries")

            var_units = var_units[0]
            if var_units != leader_unit:
                raise ValueError(
                    "Units of lead variable is meant to be {}, found {}".format(
                        leader_unit, var_units
                    )
                )

            if data_follower_time_col != in_iamdf.time_col:
                raise ValueError(
                    "`in_iamdf` time column must be the same as the time column used "
                    "to generate this filler function (`{}`)".format(
                        data_follower_time_col
                    )
                )

            key_timepoints_filter_iamdf = {
                data_follower_time_col: list(set(lead_var[data_follower_time_col]))
            }
            key_timepoints_filter_lead = {
                data_follower_time_col: list(set(iamdf_lead[data_follower_time_col]))
            }

            def get_values_at_key_timepoints(idf, time_filter):
                # filter warning about empty data frame as we handle it ourselves
                to_return = idf.filter(**time_filter)
                if to_return.data.empty:
                    raise ValueError(
                        "No time series overlap between the original and unfilled data"
                    )
                return to_return

            lead_var_timeseries = get_values_at_key_timepoints(
                lead_var, key_timepoints_filter_lead
            ).timeseries()
            iamdf_lead_timeseries = get_values_at_key_timepoints(
                iamdf_lead, key_timepoints_filter_iamdf
            ).timeseries()

            output_ts_list = []
            for label, row in lead_var_timeseries.iterrows():
                closest_ts = _select_closest(iamdf_lead_timeseries, row)

                # Filter to find the matching follow data for the same model, scenario
                # and region
                tmp = iamdf_follower.filter(
                    model=closest_ts["model"], scenario=closest_ts["scenario"]
                ).data

                # Update the model and scenario to match the elements of the input.
                tmp["model"] = label[lead_var_timeseries.index.names.index("model")]
                tmp["scenario"] = label[
                    lead_var_timeseries.index.names.index("scenario")
                ]
                output_ts_list.append(tmp)
                if in_iamdf.extra_cols:
                    for col in in_iamdf.extra_cols:
                        tmp[col] = label[lead_var_timeseries.index.names.index(col)]
            return pyam.concat(output_ts_list)