Exemple #1
0
def _filter_for_overlap(df1, df2, cols):
    """
    Returns rows in the two input dataframes which have the same columns
    Parameters
    ----------
    df1 : :obj:`pd.DataFrame`
        The first dataframe (order is irrelevant)
    df2 : :obj:`pd.DataFrame`
        The second dataframe (order is irrelevant)
    cols: list[str]
        List of columns that should be identical between the two dataframes.
    Returns
    -------
    (:obj:`pd.DataFrame`, :obj:`pd.DataFrame`)
        The two dataframes in the order they were put in, now filtered for some columns
        being identical.
    """
    lead_data = df1.data.set_index(cols)
    follow_data = df2.data.set_index(cols)
    shared_indices = [ind for ind in lead_data.index if ind in follow_data.index]
    if shared_indices:
        lead_data = lead_data.loc[shared_indices]
        follow_data = follow_data.loc[shared_indices]
        return pyam.IamDataFrame(lead_data), pyam.IamDataFrame(follow_data)
    raise ValueError("No model/scenario overlap between leader and follower data")
Exemple #2
0
def generate_combined_excel():
    lst = []

    for file in Pathways:
        filename = file.value + '_combined.csv'
        df = pyam.IamDataFrame(str(DEF_OUTPUT_PATH / filename))
        lst.append(df)

    genesys = pyam.concat(lst)
    genesys = pyam.IamDataFrame(genesys.data[pyam.IAMC_IDX +
                                             ['year', 'value']])
    genesys.to_excel(f'GENeSYS-MOD-pathways.xlsx')
Exemple #3
0
def get_sr15_scenarios(output_file, valid_model_ids):
    """
    Collects world-level data from the IIASA database for the named models and saves them to a given location.

    Parameters
    ----------
    output_file : str
        File name and location for data to be saved

    valid_model_ids : list[str]
        Names of models that are to be fetched.
    """
    conn = pyam.iiasa.Connection("IXSE_SR15")
    variables_to_fetch = ["Emissions*"]
    for model in valid_model_ids:
        print("Fetching data for {}".format(model))
        for variable in variables_to_fetch:
            print("Fetching {}".format(variable))
            var_df = conn.query(model=model, variable=variable, region="World")
            try:
                df.append(var_df, inplace=True)
            except NameError:
                df = pyam.IamDataFrame(var_df)

    print("Writing to {}".format(output_file))
    df.to_csv(output_file)
 def test_infillallrequiredvariables_check_results_interp_times(
         self, test_db, additional_cols):
     # Check that we can get valid results at interpolated times
     required_variables_list = ["Emissions|HFC|C5F12"]
     leader = ["Emissions|HFC|C2F6"]
     if additional_cols:
         test_db.data[additional_cols] = 0
         test_db = pyam.IamDataFrame(test_db.data)
     if test_db.time_col == "year":
         output_times = [2012]
     else:
         # There is a leap year during 2015, so we subtract 3/5 of a day
         output_times = [pd.Timestamp(year=2012, day=14, month=6, hour=10)]
     to_fill = test_db.filter(variable=leader)
     output_df = infill_all_required_variables(
         to_fill,
         test_db,
         leader,
         required_variables_list,
         check_data_returned=True,
         output_timesteps=output_times,
     )
     # The values should be interpolations between the known values at the start
     assert np.isclose(output_df.data["value"][0], (3 * 0.5 + 2 * 1.5) / 5,
                       atol=1e-5)
     assert np.isclose(output_df.data["value"][1], (3 * 2 + 2 * 3) / 5,
                       atol=1e-5)
Exemple #5
0
def test_convert_units_to_mtco2_equiv_fails_with_month_units(
        check_aggregate_df):
    limited_check_agg = check_aggregate_df.filter(variable="Primary Energy*",
                                                  keep=False)
    limited_check_agg.data["unit"].iloc[0] = "Mt CH4/mo"
    limited_check_agg = pyam.IamDataFrame(limited_check_agg.data)
    err_msg = "'mo' is not defined in the unit registry"
    with pytest.raises(UndefinedUnitError, match=err_msg):
        convert_units_to_MtCO2_equiv(limited_check_agg)
Exemple #6
0
def test_convert_units_to_mtco2_equiv_fails_with_oom_units(check_aggregate_df):
    limited_check_agg = check_aggregate_df.filter(variable="Primary Energy*",
                                                  keep=False)
    limited_check_agg.data["unit"].iloc[0] = "Tt CO2"
    limited_check_agg = pyam.IamDataFrame(limited_check_agg.data)
    err_msg = re.escape(
        "Cannot convert from Tt CO2 (cleaned is: Tt CO2) to Mt CO2-equiv/yr (cleaned is: Mt CO2/yr)"
    )
    with pytest.raises(ValueError, match=err_msg):
        convert_units_to_MtCO2_equiv(limited_check_agg)
Exemple #7
0
def generate_idataframe_renewable_series(data_wrapper):
    logging.info('Executing: generate_idataframe')
    frames = []

    for frame in data_wrapper.transformed_data:
        frames.append(data_wrapper.transformed_data[frame])

    values = pd.concat(frames)
    idataframe = pyam.IamDataFrame(values)

    return idataframe
Exemple #8
0
def test_stackplot_missing_zero_issue_266(plot_stackplot_df):
    df = pyam.IamDataFrame(pd.DataFrame(
        [['a', 1, 2, 3, 4], ['b', 0, 1, 2, 3], ['c', -1, 1, -1, -1],
         ['d', 1, 1, 1, -1]],
        columns=['variable', 2010, 2020, 2030, 2040],
    ),
                           model='model_a',
                           scenario='scen_a',
                           region='World',
                           unit='some_unit')

    fig, ax = plt.subplots(figsize=(8, 8))
    df.plot.stack(ax=ax)
    return fig
Exemple #9
0
def _adjust_time_style_to_match(in_df, target_df):
    if in_df.time_col != target_df.time_col:
        in_df = in_df.timeseries()
        if target_df.time_col == "time":
            target_df_year_map = {
                v.year: v
                for v in target_df.timeseries().columns
            }
            in_df.columns = in_df.columns.map(lambda x: target_df_year_map[
                x] if x in target_df_year_map else dt.datetime(x, 1, 1))
        else:
            in_df.columns = in_df.columns.map(lambda x: x.year)
        return pyam.IamDataFrame(in_df)

    return in_df
Exemple #10
0
def _combine_data(input_file,
                  generate_series_data: bool = False,
                  generate_load_factors: bool = False):
    file_combined = input_file + "_combined.csv"
    file_yearly = input_file + '_yearly.csv'
    file_load = input_file + '_loadfactors.csv'
    file_series = input_file + '_series.csv'

    idataframe_base = pyam.IamDataFrame(str(DEF_OUTPUT_PATH / file_yearly))

    if generate_load_factors:
        idataframe_load = pyam.IamDataFrame(str(DEF_OUTPUT_PATH / file_load))
        idataframe_base_with_load = idataframe_base.append(idataframe_load)
    else:
        idataframe_base_with_load = idataframe_base

    if generate_series_data:
        idataframe_series = pyam.IamDataFrame(
            str(DEF_OUTPUT_PATH / file_series))
        idataframe_all = idataframe_base_with_load.append(idataframe_series)
    else:
        idataframe_all = idataframe_base_with_load

    idataframe_all.to_csv(DEF_OUTPUT_PATH / file_combined)
Exemple #11
0
def test_line_color_fill_between_interpolate(plot_df):
    # designed to create the sawtooth behavior at a midpoint with missing data
    df = pyam.IamDataFrame(plot_df.data.copy())
    fig, ax = plt.subplots(figsize=(8, 8))
    newdata = ['test_model1', 'test_scenario1', 'World', 'Primary Energy|Coal',
               'EJ/y', 2010, 3.50]
    df.data.loc[len(df.data) - 1] = newdata
    newdata = ['test_model1', 'test_scenario1', 'World', 'Primary Energy|Coal',
               'EJ/y', 2012, 3.50]
    df.data.loc[len(df.data)] = newdata
    newdata = ['test_model1', 'test_scenario1', 'World', 'Primary Energy|Coal',
               'EJ/y', 2015, 3.50]
    df.data.loc[len(df.data) + 1] = newdata
    df.line_plot(ax=ax, color='model', fill_between=True, legend=True)
    return fig
Exemple #12
0
def test_barplot_stacked_net_line(plot_df):
    fig, ax = plt.subplots(figsize=(8, 8))
    # explicitly add negative contributions for net lines
    df = pyam.IamDataFrame(plot_df.data.copy())
    vals = [(2005, 0.35), (2010, -1.0), (2015, -4.0)]
    for i, (y, v) in enumerate(vals):
        newdata = [
            'test_model1', 'test_scenario1', 'World', 'Primary Energy|foo',
            'EJ/y', y, v
        ]
        df.data.loc[len(df) + i] = newdata
    df.filter(variable='Primary Energy|*', model='test_model1',
              scenario='test_scenario1', region='World')\
        .plot.bar(ax=ax, bars='variable', stacked=True)
    plotting.add_net_values_to_bar_plot(ax, color='r')
    return fig
Exemple #13
0
def test_find_matching_scenarios_dual_region():
    multiregion_df = simple_df.data.append(
        pd.DataFrame(
            [[_mc, _sa, "Country", _eco2, _gtc, 2010, 2]],
            columns=_msrvu + [simple_df.time_col, "value"],
        ))
    multiregion_df = pyam.IamDataFrame(multiregion_df)
    with pytest.raises(AssertionError):
        find_matching_scenarios(
            df_to_test,
            multiregion_df,
            variable_follower,
            variable_leaders,
            ["right_scenario", "wrong_scenario", "scen_a", "scen_b"],
            return_all_info=True,
        )
Exemple #14
0
def test_stackplot_negative():
    # test that data with both positive & negative values are shown correctly
    TEST_STACKPLOT_NEGATIVE = pd.DataFrame(
        [
            ['var1', 1, -1, 0],
            ['var2', 1, 1, -2],
            ['var3', -1, 1, 1],
        ],
        columns=['variable', 2005, 2010, 2015],
    )
    fig, ax = plt.subplots(figsize=(8, 8))
    df = pyam.IamDataFrame(TEST_STACKPLOT_NEGATIVE,
                           model='model_a',
                           scenario='scen_a',
                           region='World',
                           unit='foo')
    df.plot.stack(ax=ax, total=True)
    return fig
Exemple #15
0
def download_or_load_sr15(filename, valid_model_ids="*"):
    """
    Load SR1.5 data, if it isn't there, download it

    Parameters
    ----------
    filename : str
        Filename in which to look for/save the data
    valid_model_ids : str
        Models to return from date

    Returns
    -------
    :obj: `pyam.IamDataFrame`
        The loaded data
    """
    if not os.path.isfile(filename):
        get_sr15_scenarios(filename, valid_model_ids)
    return pyam.IamDataFrame(filename).filter(model=valid_model_ids)
Exemple #16
0
    def _construct_consistent_values(self, aggregate_name, components,
                                     db_to_generate):
        """
            Calculates the sum of the components and creates an IamDataFrame with this
            value under variable type `aggregate_name`.

            Parameters
            ----------
            aggregate_name : str
                The name of the aggregate variable.

            components : [str]
                List of the names of the variables to be summed.

            db_to_generate : :obj:`pyam.IamDataFrame`
                Input data from which to construct consistent values.

            Return
            ------
            :obj:`pyam.IamDataFrame`
                Consistently calculated aggregate data.
            """
        assert (aggregate_name not in db_to_generate.variables().values
                ), "We already have a variable of this name"
        relevant_db = db_to_generate.filter(variable=components)
        units = relevant_db.data["unit"].drop_duplicates().sort_values()
        unit_equivs = units.map(
            lambda x: x.replace("-equiv", "")).drop_duplicates()
        if len(unit_equivs) == 0:
            raise ValueError(
                "Attempting to construct a consistent {} but none of the components "
                "present".format(aggregate_name))
        elif len(unit_equivs) > 1:
            raise ValueError(
                "Too many units found to make a consistent {}".format(
                    aggregate_name))
        use = (relevant_db.data.groupby(
            ["model", "scenario", "region",
             relevant_db.time_col]).agg("sum").reset_index())
        # Units are sorted in alphabetical order so we choose the first to get -equiv
        use["unit"] = units.iloc[0]
        use["variable"] = aggregate_name
        return pyam.IamDataFrame(use)
Exemple #17
0
def test_stackplot_negative():
    # test that data with both positive & negative values are shown correctly
    TEST_STACKPLOT_NEGATIVE = pd.DataFrame(
        [
            ["var1", 1, -1, 0],
            ["var2", 1, 1, -2],
            ["var3", -1, 1, 1],
        ],
        columns=["variable", 2005, 2010, 2015],
    )
    fig, ax = plt.subplots(figsize=(8, 8))
    df = pyam.IamDataFrame(
        TEST_STACKPLOT_NEGATIVE,
        model="model_a",
        scenario="scen_a",
        region="World",
        unit="foo",
    )
    df.plot.stack(ax=ax, total=True)
    return fig
Exemple #18
0
def test_stackplot_missing_zero_issue_266(plot_stackplot_df):
    df = pyam.IamDataFrame(
        pd.DataFrame(
            [
                ["a", 1, 2, 3, 4],
                ["b", 0, 1, 2, 3],
                ["c", -1, 1, -1, -1],
                ["d", 1, 1, 1, -1],
            ],
            columns=["variable", 2010, 2020, 2030, 2040],
        ),
        model="model_a",
        scenario="scen_a",
        region="World",
        unit="some_unit",
    )

    fig, ax = plt.subplots(figsize=(8, 8))
    df.plot.stack(ax=ax)
    return fig
def get_sr15_scenarios(output_file):
    conn = pyam.iiasa.Connection("iamc15")
    variables_to_fetch = ["Emissions*"]
    for model in conn.models():
        print("Fetching data for {}".format(model))
        for variable in variables_to_fetch:
            print("Fetching {}".format(variable))
            try:
                var_df = conn.query(model=model, variable=variable)
            except Exception as e:
                print("Failed for {}".format(model))
                print(str(e))
                continue

            try:
                df.append(var_df, inplace=True)
            except NameError:
                df = pyam.IamDataFrame(var_df)

    print("Writing to {}".format(output_file))
    df.to_csv(output_file)
Exemple #20
0
def test_line_PYAM_COLORS(plot_df):
    # add a family of lines for each color in plotting.PYAM_COLORS separated by
    # a small offset
    update = {'color': {'model': {}}}
    _df = plot_df.filter(
        model='test_model',
        variable='Primary Energy',
        scenario='test_scenario1',
    ).data.copy()
    dfs = []
    for i, color in enumerate(plotting.PYAM_COLORS):
        df = _df.copy()
        model = color
        df['model'] = model
        df['value'] += i
        update['color']['model'][model] = color
        dfs.append(df)
    df = pyam.IamDataFrame(pd.concat(dfs))
    fig, ax = plt.subplots(figsize=(8, 8))
    with update_run_control(update):
        df.line_plot(ax=ax, color='model', legend=True)
    return fig
Exemple #21
0
def test_store_ts(request, caplog, test_mp):
    # Computer and target scenario
    c = Computer()

    # Target scenario
    model_name = __name__
    scenario_name = "test scenario"
    scen = Scenario(test_mp, model_name, scenario_name, version="new")
    scen.commit("Empty scenario")
    c.add("target", scen)

    # Add test data to the Computer: a pd.DataFrame
    input_1 = test_data[0].assign(variable="Foo")
    c.add("input 1", input_1)

    # A pyam.IamDataFrame
    input_2 = test_data[2050].assign(variable="Bar")
    c.add("input 2", pyam.IamDataFrame(input_2))

    # Expected results: same as input, but with the `model` and `scenario` columns
    # filled automatically.
    expected_1 = input_1.assign(model=model_name, scenario=scenario_name)
    expected_2 = input_2.assign(model=model_name, scenario=scenario_name)

    # Task to update the scenario with the data
    c.add("test 1", store_ts, "target", "input 1", "input 2")

    # Scenario starts empty of time series data
    assert 0 == len(scen.timeseries())

    # The computation runs successfully
    c.get("test 1")

    # All rows from both inputs are present
    assert len(input_1) + len(input_2) == len(scen.timeseries())

    # Input is stored exactly
    assert_frame_equal(expected_1, scen.timeseries(variable="Foo"))
    assert_frame_equal(expected_2, scen.timeseries(variable="Bar"))
Exemple #22
0
def test_line_PYAM_COLORS(plot_df):
    # add a family of lines for each color in plotting.PYAM_COLORS separated by
    # a small offset
    update = {"color": {"model": {}}}
    _df = plot_df.filter(
        model="test_model",
        variable="Primary Energy",
        scenario="test_scenario1",
    ).data.copy()
    dfs = []
    for i, color in enumerate(plotting.PYAM_COLORS):
        df = _df.copy()
        model = color
        df["model"] = model
        df["value"] += i
        update["color"]["model"][model] = color
        dfs.append(df)
    df = pyam.IamDataFrame(pd.concat(dfs))
    fig, ax = plt.subplots(figsize=(8, 8))
    with update_run_control(update):
        df.plot(ax=ax, color="model", legend=True)
    return fig
 def test_relationship_ignores_incomplete_data(self, larger_df, test_db):
     # If we make the data inconsistent, we still get a consistent (if arbitrary)
     # output.
     aggregate = "Emissions|KyotoTotal"
     # This makes the data contain duplicates:
     test_db.data["variable"] = aggregate
     test_db.data["unit"] = "Mt CO2/yr"
     # We remove the extra column from the larger_df as it's not found in test_df
     larger_df.data.drop("meta", axis=1, inplace=True)
     larger_df = pyam.IamDataFrame(larger_df.data)
     larger_df = _adjust_time_style_to_match(larger_df, test_db)
     tcruncher = self.tclass(larger_df)
     if test_db.time_col == "year":
         larger_df.filter(year=test_db.data[test_db.time_col].values, inplace=True)
     else:
         larger_df.filter(time=test_db.data[test_db.time_col], inplace=True)
     components = ["Emissions|CH4", "Emissions|CO2"]
     returned = tcruncher.infill_components(aggregate, components, test_db)
     assert len(returned.data) == len(test_db.data)
     # Make the data consistent:
     test_db.data = test_db.data.iloc[0:2]
     returned = tcruncher.infill_components(aggregate, components, test_db)
     assert len(returned.data) == 2 * len(test_db.data)
Exemple #24
0
def test_barplot_stacked_net_line(plot_df):
    fig, ax = plt.subplots(figsize=(8, 8))
    # explicitly add negative contributions for net lines
    df = pyam.IamDataFrame(plot_df.data.copy())
    vals = [(2005, 0.35), (2010, -1.0), (2015, -4.0)]
    for i, (y, v) in enumerate(vals):
        newdata = [
            "test_model1",
            "test_scenario1",
            "World",
            "Primary Energy|foo",
            "EJ/y",
            y,
            v,
        ]
        df.data.loc[len(df) + i] = newdata
    df.filter(
        variable="Primary Energy|*",
        model="test_model1",
        scenario="test_scenario1",
        region="World",
    ).plot.bar(ax=ax, bars="variable", stacked=True)
    plotting.add_net_values_to_bar_plot(ax, color="r")
    return fig
Exemple #25
0
"""
=======================
Plot Data as a Bar Plot
=======================

"""
# sphinx_gallery_thumbnail_number = 3
import matplotlib.pyplot as plt
import pyam

###############################
# Read in some example data

fname = 'msg_input.csv'
df = pyam.IamDataFrame(fname, encoding='ISO-8859-1')
print(df.head())

###############################
# We generated a simple stacked bar chart as below

data = df.filter({
    'variable': 'Emissions|CO2|*',
    'level': 0,
    'region': 'World'
})

fig, ax = plt.subplots(figsize=(10, 10))
data.bar_plot(ax=ax, stacked=True)
fig.subplots_adjust(right=0.55)
plt.show()
Exemple #26
0
###############################
# Read in tutorial data and show a summary
# ****************************************
#
# This gallery uses the scenario data from the first-steps tutorial.
#
# If you haven't cloned the **pyam** GitHub repository to your machine,
# you can download the file from
# https://github.com/IAMconsortium/pyam/tree/master/doc/source/tutorials.
#
# Make sure to place the data file in the same folder as this script/notebook.

import matplotlib.pyplot as plt
import pyam

df = pyam.IamDataFrame("tutorial_data.csv")
df

##############################
# First, we generate a simple stacked line chart
# of all components of primary energy supply for one scenario.

model, scenario = "IMAGE 3.0.1", "CD-LINKS_NPi2020_400"

data = df.filter(model=model,
                 scenario=scenario,
                 variable="Primary Energy|*",
                 region="World")

data.plot.stack(title=scenario)
plt.legend(loc=1)
def test_scenarios(test_data_dir):
    scenarios = pyam.IamDataFrame(
        os.path.join(test_data_dir, "rcmip_scen_ssp_world_emissions.csv")
    )

    return scenarios
data_file_scenarios = os.path.join(os.path.dirname(__file__), DATA_FILE_SCENARIOS_NAME)
data_file_regression = os.path.join(
    os.path.dirname(__file__), DATA_FILE_REGRESSION_NAME
)


logger = logging.getLogger(__name__)
logging.getLogger().setLevel(logging.DEBUG)
logFormatter = logging.Formatter("%(threadName)s - %(levelname)s:  %(message)s")
stdoutHandler = logging.StreamHandler()
stdoutHandler.setFormatter(logFormatter)
logging.getLogger().addHandler(stdoutHandler)


scenarios = pyam.IamDataFrame(data_file_scenarios)

assert MAGICC7.get_version() == EXPECTED_MAGICC_VERSION, MAGICC7.get_version()

res = run(
    climate_models_cfgs={
        "MAGICC7": [
            {
                "core_climatesensitivity": 3,
                "rf_soxi_dir_wm2": -0.2,
                "out_temperature": 1,
            },
            {
                "core_climatesensitivity": 2,
                "rf_soxi_dir_wm2": -0.1,
                "out_temperature": 1,
Exemple #29
0
"""
==================
Plot Regional Data
==================

"""
import matplotlib.pyplot as plt
import pyam

fname = 'msg_input.csv'

df = pyam.IamDataFrame(fname)

df = (df.filter({
    'variable': 'Emissions|CO2',
    'year': 2050
}).filter({
    'region': 'World'
}, keep=False).map_regions('iso'))

print(df.head())

df.region_plot()

plt.show()
Exemple #30
0
"""
###############################
# Read in tutorial data and show a summary
# ****************************************
#
# This gallery uses the scenario data from the first-steps tutorial.
#
# If you haven't cloned the **pyam** GitHub repository to your machine,
# you can download the file from
# https://github.com/IAMconsortium/pyam/tree/master/doc/source/tutorials.
#
# Make sure to place the data file in the same folder as this script/notebook.

import matplotlib.pyplot as plt
import pyam
df = pyam.IamDataFrame('tutorial_data.csv')
df

###############################
# Show relation of variables
# **************************
#
# In the first example, we show the relation between two variables,
# biomass and fossil energy use.

data = df.filter(region='World')

data.plot.scatter(x='Primary Energy|Biomass',
                  y='Primary Energy|Fossil',
                  color='scenario')
plt.tight_layout()