def _filter_for_overlap(df1, df2, cols): """ Returns rows in the two input dataframes which have the same columns Parameters ---------- df1 : :obj:`pd.DataFrame` The first dataframe (order is irrelevant) df2 : :obj:`pd.DataFrame` The second dataframe (order is irrelevant) cols: list[str] List of columns that should be identical between the two dataframes. Returns ------- (:obj:`pd.DataFrame`, :obj:`pd.DataFrame`) The two dataframes in the order they were put in, now filtered for some columns being identical. """ lead_data = df1.data.set_index(cols) follow_data = df2.data.set_index(cols) shared_indices = [ind for ind in lead_data.index if ind in follow_data.index] if shared_indices: lead_data = lead_data.loc[shared_indices] follow_data = follow_data.loc[shared_indices] return pyam.IamDataFrame(lead_data), pyam.IamDataFrame(follow_data) raise ValueError("No model/scenario overlap between leader and follower data")
def generate_combined_excel(): lst = [] for file in Pathways: filename = file.value + '_combined.csv' df = pyam.IamDataFrame(str(DEF_OUTPUT_PATH / filename)) lst.append(df) genesys = pyam.concat(lst) genesys = pyam.IamDataFrame(genesys.data[pyam.IAMC_IDX + ['year', 'value']]) genesys.to_excel(f'GENeSYS-MOD-pathways.xlsx')
def get_sr15_scenarios(output_file, valid_model_ids): """ Collects world-level data from the IIASA database for the named models and saves them to a given location. Parameters ---------- output_file : str File name and location for data to be saved valid_model_ids : list[str] Names of models that are to be fetched. """ conn = pyam.iiasa.Connection("IXSE_SR15") variables_to_fetch = ["Emissions*"] for model in valid_model_ids: print("Fetching data for {}".format(model)) for variable in variables_to_fetch: print("Fetching {}".format(variable)) var_df = conn.query(model=model, variable=variable, region="World") try: df.append(var_df, inplace=True) except NameError: df = pyam.IamDataFrame(var_df) print("Writing to {}".format(output_file)) df.to_csv(output_file)
def test_infillallrequiredvariables_check_results_interp_times( self, test_db, additional_cols): # Check that we can get valid results at interpolated times required_variables_list = ["Emissions|HFC|C5F12"] leader = ["Emissions|HFC|C2F6"] if additional_cols: test_db.data[additional_cols] = 0 test_db = pyam.IamDataFrame(test_db.data) if test_db.time_col == "year": output_times = [2012] else: # There is a leap year during 2015, so we subtract 3/5 of a day output_times = [pd.Timestamp(year=2012, day=14, month=6, hour=10)] to_fill = test_db.filter(variable=leader) output_df = infill_all_required_variables( to_fill, test_db, leader, required_variables_list, check_data_returned=True, output_timesteps=output_times, ) # The values should be interpolations between the known values at the start assert np.isclose(output_df.data["value"][0], (3 * 0.5 + 2 * 1.5) / 5, atol=1e-5) assert np.isclose(output_df.data["value"][1], (3 * 2 + 2 * 3) / 5, atol=1e-5)
def test_convert_units_to_mtco2_equiv_fails_with_month_units( check_aggregate_df): limited_check_agg = check_aggregate_df.filter(variable="Primary Energy*", keep=False) limited_check_agg.data["unit"].iloc[0] = "Mt CH4/mo" limited_check_agg = pyam.IamDataFrame(limited_check_agg.data) err_msg = "'mo' is not defined in the unit registry" with pytest.raises(UndefinedUnitError, match=err_msg): convert_units_to_MtCO2_equiv(limited_check_agg)
def test_convert_units_to_mtco2_equiv_fails_with_oom_units(check_aggregate_df): limited_check_agg = check_aggregate_df.filter(variable="Primary Energy*", keep=False) limited_check_agg.data["unit"].iloc[0] = "Tt CO2" limited_check_agg = pyam.IamDataFrame(limited_check_agg.data) err_msg = re.escape( "Cannot convert from Tt CO2 (cleaned is: Tt CO2) to Mt CO2-equiv/yr (cleaned is: Mt CO2/yr)" ) with pytest.raises(ValueError, match=err_msg): convert_units_to_MtCO2_equiv(limited_check_agg)
def generate_idataframe_renewable_series(data_wrapper): logging.info('Executing: generate_idataframe') frames = [] for frame in data_wrapper.transformed_data: frames.append(data_wrapper.transformed_data[frame]) values = pd.concat(frames) idataframe = pyam.IamDataFrame(values) return idataframe
def test_stackplot_missing_zero_issue_266(plot_stackplot_df): df = pyam.IamDataFrame(pd.DataFrame( [['a', 1, 2, 3, 4], ['b', 0, 1, 2, 3], ['c', -1, 1, -1, -1], ['d', 1, 1, 1, -1]], columns=['variable', 2010, 2020, 2030, 2040], ), model='model_a', scenario='scen_a', region='World', unit='some_unit') fig, ax = plt.subplots(figsize=(8, 8)) df.plot.stack(ax=ax) return fig
def _adjust_time_style_to_match(in_df, target_df): if in_df.time_col != target_df.time_col: in_df = in_df.timeseries() if target_df.time_col == "time": target_df_year_map = { v.year: v for v in target_df.timeseries().columns } in_df.columns = in_df.columns.map(lambda x: target_df_year_map[ x] if x in target_df_year_map else dt.datetime(x, 1, 1)) else: in_df.columns = in_df.columns.map(lambda x: x.year) return pyam.IamDataFrame(in_df) return in_df
def _combine_data(input_file, generate_series_data: bool = False, generate_load_factors: bool = False): file_combined = input_file + "_combined.csv" file_yearly = input_file + '_yearly.csv' file_load = input_file + '_loadfactors.csv' file_series = input_file + '_series.csv' idataframe_base = pyam.IamDataFrame(str(DEF_OUTPUT_PATH / file_yearly)) if generate_load_factors: idataframe_load = pyam.IamDataFrame(str(DEF_OUTPUT_PATH / file_load)) idataframe_base_with_load = idataframe_base.append(idataframe_load) else: idataframe_base_with_load = idataframe_base if generate_series_data: idataframe_series = pyam.IamDataFrame( str(DEF_OUTPUT_PATH / file_series)) idataframe_all = idataframe_base_with_load.append(idataframe_series) else: idataframe_all = idataframe_base_with_load idataframe_all.to_csv(DEF_OUTPUT_PATH / file_combined)
def test_line_color_fill_between_interpolate(plot_df): # designed to create the sawtooth behavior at a midpoint with missing data df = pyam.IamDataFrame(plot_df.data.copy()) fig, ax = plt.subplots(figsize=(8, 8)) newdata = ['test_model1', 'test_scenario1', 'World', 'Primary Energy|Coal', 'EJ/y', 2010, 3.50] df.data.loc[len(df.data) - 1] = newdata newdata = ['test_model1', 'test_scenario1', 'World', 'Primary Energy|Coal', 'EJ/y', 2012, 3.50] df.data.loc[len(df.data)] = newdata newdata = ['test_model1', 'test_scenario1', 'World', 'Primary Energy|Coal', 'EJ/y', 2015, 3.50] df.data.loc[len(df.data) + 1] = newdata df.line_plot(ax=ax, color='model', fill_between=True, legend=True) return fig
def test_barplot_stacked_net_line(plot_df): fig, ax = plt.subplots(figsize=(8, 8)) # explicitly add negative contributions for net lines df = pyam.IamDataFrame(plot_df.data.copy()) vals = [(2005, 0.35), (2010, -1.0), (2015, -4.0)] for i, (y, v) in enumerate(vals): newdata = [ 'test_model1', 'test_scenario1', 'World', 'Primary Energy|foo', 'EJ/y', y, v ] df.data.loc[len(df) + i] = newdata df.filter(variable='Primary Energy|*', model='test_model1', scenario='test_scenario1', region='World')\ .plot.bar(ax=ax, bars='variable', stacked=True) plotting.add_net_values_to_bar_plot(ax, color='r') return fig
def test_find_matching_scenarios_dual_region(): multiregion_df = simple_df.data.append( pd.DataFrame( [[_mc, _sa, "Country", _eco2, _gtc, 2010, 2]], columns=_msrvu + [simple_df.time_col, "value"], )) multiregion_df = pyam.IamDataFrame(multiregion_df) with pytest.raises(AssertionError): find_matching_scenarios( df_to_test, multiregion_df, variable_follower, variable_leaders, ["right_scenario", "wrong_scenario", "scen_a", "scen_b"], return_all_info=True, )
def test_stackplot_negative(): # test that data with both positive & negative values are shown correctly TEST_STACKPLOT_NEGATIVE = pd.DataFrame( [ ['var1', 1, -1, 0], ['var2', 1, 1, -2], ['var3', -1, 1, 1], ], columns=['variable', 2005, 2010, 2015], ) fig, ax = plt.subplots(figsize=(8, 8)) df = pyam.IamDataFrame(TEST_STACKPLOT_NEGATIVE, model='model_a', scenario='scen_a', region='World', unit='foo') df.plot.stack(ax=ax, total=True) return fig
def download_or_load_sr15(filename, valid_model_ids="*"): """ Load SR1.5 data, if it isn't there, download it Parameters ---------- filename : str Filename in which to look for/save the data valid_model_ids : str Models to return from date Returns ------- :obj: `pyam.IamDataFrame` The loaded data """ if not os.path.isfile(filename): get_sr15_scenarios(filename, valid_model_ids) return pyam.IamDataFrame(filename).filter(model=valid_model_ids)
def _construct_consistent_values(self, aggregate_name, components, db_to_generate): """ Calculates the sum of the components and creates an IamDataFrame with this value under variable type `aggregate_name`. Parameters ---------- aggregate_name : str The name of the aggregate variable. components : [str] List of the names of the variables to be summed. db_to_generate : :obj:`pyam.IamDataFrame` Input data from which to construct consistent values. Return ------ :obj:`pyam.IamDataFrame` Consistently calculated aggregate data. """ assert (aggregate_name not in db_to_generate.variables().values ), "We already have a variable of this name" relevant_db = db_to_generate.filter(variable=components) units = relevant_db.data["unit"].drop_duplicates().sort_values() unit_equivs = units.map( lambda x: x.replace("-equiv", "")).drop_duplicates() if len(unit_equivs) == 0: raise ValueError( "Attempting to construct a consistent {} but none of the components " "present".format(aggregate_name)) elif len(unit_equivs) > 1: raise ValueError( "Too many units found to make a consistent {}".format( aggregate_name)) use = (relevant_db.data.groupby( ["model", "scenario", "region", relevant_db.time_col]).agg("sum").reset_index()) # Units are sorted in alphabetical order so we choose the first to get -equiv use["unit"] = units.iloc[0] use["variable"] = aggregate_name return pyam.IamDataFrame(use)
def test_stackplot_negative(): # test that data with both positive & negative values are shown correctly TEST_STACKPLOT_NEGATIVE = pd.DataFrame( [ ["var1", 1, -1, 0], ["var2", 1, 1, -2], ["var3", -1, 1, 1], ], columns=["variable", 2005, 2010, 2015], ) fig, ax = plt.subplots(figsize=(8, 8)) df = pyam.IamDataFrame( TEST_STACKPLOT_NEGATIVE, model="model_a", scenario="scen_a", region="World", unit="foo", ) df.plot.stack(ax=ax, total=True) return fig
def test_stackplot_missing_zero_issue_266(plot_stackplot_df): df = pyam.IamDataFrame( pd.DataFrame( [ ["a", 1, 2, 3, 4], ["b", 0, 1, 2, 3], ["c", -1, 1, -1, -1], ["d", 1, 1, 1, -1], ], columns=["variable", 2010, 2020, 2030, 2040], ), model="model_a", scenario="scen_a", region="World", unit="some_unit", ) fig, ax = plt.subplots(figsize=(8, 8)) df.plot.stack(ax=ax) return fig
def get_sr15_scenarios(output_file): conn = pyam.iiasa.Connection("iamc15") variables_to_fetch = ["Emissions*"] for model in conn.models(): print("Fetching data for {}".format(model)) for variable in variables_to_fetch: print("Fetching {}".format(variable)) try: var_df = conn.query(model=model, variable=variable) except Exception as e: print("Failed for {}".format(model)) print(str(e)) continue try: df.append(var_df, inplace=True) except NameError: df = pyam.IamDataFrame(var_df) print("Writing to {}".format(output_file)) df.to_csv(output_file)
def test_line_PYAM_COLORS(plot_df): # add a family of lines for each color in plotting.PYAM_COLORS separated by # a small offset update = {'color': {'model': {}}} _df = plot_df.filter( model='test_model', variable='Primary Energy', scenario='test_scenario1', ).data.copy() dfs = [] for i, color in enumerate(plotting.PYAM_COLORS): df = _df.copy() model = color df['model'] = model df['value'] += i update['color']['model'][model] = color dfs.append(df) df = pyam.IamDataFrame(pd.concat(dfs)) fig, ax = plt.subplots(figsize=(8, 8)) with update_run_control(update): df.line_plot(ax=ax, color='model', legend=True) return fig
def test_store_ts(request, caplog, test_mp): # Computer and target scenario c = Computer() # Target scenario model_name = __name__ scenario_name = "test scenario" scen = Scenario(test_mp, model_name, scenario_name, version="new") scen.commit("Empty scenario") c.add("target", scen) # Add test data to the Computer: a pd.DataFrame input_1 = test_data[0].assign(variable="Foo") c.add("input 1", input_1) # A pyam.IamDataFrame input_2 = test_data[2050].assign(variable="Bar") c.add("input 2", pyam.IamDataFrame(input_2)) # Expected results: same as input, but with the `model` and `scenario` columns # filled automatically. expected_1 = input_1.assign(model=model_name, scenario=scenario_name) expected_2 = input_2.assign(model=model_name, scenario=scenario_name) # Task to update the scenario with the data c.add("test 1", store_ts, "target", "input 1", "input 2") # Scenario starts empty of time series data assert 0 == len(scen.timeseries()) # The computation runs successfully c.get("test 1") # All rows from both inputs are present assert len(input_1) + len(input_2) == len(scen.timeseries()) # Input is stored exactly assert_frame_equal(expected_1, scen.timeseries(variable="Foo")) assert_frame_equal(expected_2, scen.timeseries(variable="Bar"))
def test_line_PYAM_COLORS(plot_df): # add a family of lines for each color in plotting.PYAM_COLORS separated by # a small offset update = {"color": {"model": {}}} _df = plot_df.filter( model="test_model", variable="Primary Energy", scenario="test_scenario1", ).data.copy() dfs = [] for i, color in enumerate(plotting.PYAM_COLORS): df = _df.copy() model = color df["model"] = model df["value"] += i update["color"]["model"][model] = color dfs.append(df) df = pyam.IamDataFrame(pd.concat(dfs)) fig, ax = plt.subplots(figsize=(8, 8)) with update_run_control(update): df.plot(ax=ax, color="model", legend=True) return fig
def test_relationship_ignores_incomplete_data(self, larger_df, test_db): # If we make the data inconsistent, we still get a consistent (if arbitrary) # output. aggregate = "Emissions|KyotoTotal" # This makes the data contain duplicates: test_db.data["variable"] = aggregate test_db.data["unit"] = "Mt CO2/yr" # We remove the extra column from the larger_df as it's not found in test_df larger_df.data.drop("meta", axis=1, inplace=True) larger_df = pyam.IamDataFrame(larger_df.data) larger_df = _adjust_time_style_to_match(larger_df, test_db) tcruncher = self.tclass(larger_df) if test_db.time_col == "year": larger_df.filter(year=test_db.data[test_db.time_col].values, inplace=True) else: larger_df.filter(time=test_db.data[test_db.time_col], inplace=True) components = ["Emissions|CH4", "Emissions|CO2"] returned = tcruncher.infill_components(aggregate, components, test_db) assert len(returned.data) == len(test_db.data) # Make the data consistent: test_db.data = test_db.data.iloc[0:2] returned = tcruncher.infill_components(aggregate, components, test_db) assert len(returned.data) == 2 * len(test_db.data)
def test_barplot_stacked_net_line(plot_df): fig, ax = plt.subplots(figsize=(8, 8)) # explicitly add negative contributions for net lines df = pyam.IamDataFrame(plot_df.data.copy()) vals = [(2005, 0.35), (2010, -1.0), (2015, -4.0)] for i, (y, v) in enumerate(vals): newdata = [ "test_model1", "test_scenario1", "World", "Primary Energy|foo", "EJ/y", y, v, ] df.data.loc[len(df) + i] = newdata df.filter( variable="Primary Energy|*", model="test_model1", scenario="test_scenario1", region="World", ).plot.bar(ax=ax, bars="variable", stacked=True) plotting.add_net_values_to_bar_plot(ax, color="r") return fig
""" ======================= Plot Data as a Bar Plot ======================= """ # sphinx_gallery_thumbnail_number = 3 import matplotlib.pyplot as plt import pyam ############################### # Read in some example data fname = 'msg_input.csv' df = pyam.IamDataFrame(fname, encoding='ISO-8859-1') print(df.head()) ############################### # We generated a simple stacked bar chart as below data = df.filter({ 'variable': 'Emissions|CO2|*', 'level': 0, 'region': 'World' }) fig, ax = plt.subplots(figsize=(10, 10)) data.bar_plot(ax=ax, stacked=True) fig.subplots_adjust(right=0.55) plt.show()
############################### # Read in tutorial data and show a summary # **************************************** # # This gallery uses the scenario data from the first-steps tutorial. # # If you haven't cloned the **pyam** GitHub repository to your machine, # you can download the file from # https://github.com/IAMconsortium/pyam/tree/master/doc/source/tutorials. # # Make sure to place the data file in the same folder as this script/notebook. import matplotlib.pyplot as plt import pyam df = pyam.IamDataFrame("tutorial_data.csv") df ############################## # First, we generate a simple stacked line chart # of all components of primary energy supply for one scenario. model, scenario = "IMAGE 3.0.1", "CD-LINKS_NPi2020_400" data = df.filter(model=model, scenario=scenario, variable="Primary Energy|*", region="World") data.plot.stack(title=scenario) plt.legend(loc=1)
def test_scenarios(test_data_dir): scenarios = pyam.IamDataFrame( os.path.join(test_data_dir, "rcmip_scen_ssp_world_emissions.csv") ) return scenarios
data_file_scenarios = os.path.join(os.path.dirname(__file__), DATA_FILE_SCENARIOS_NAME) data_file_regression = os.path.join( os.path.dirname(__file__), DATA_FILE_REGRESSION_NAME ) logger = logging.getLogger(__name__) logging.getLogger().setLevel(logging.DEBUG) logFormatter = logging.Formatter("%(threadName)s - %(levelname)s: %(message)s") stdoutHandler = logging.StreamHandler() stdoutHandler.setFormatter(logFormatter) logging.getLogger().addHandler(stdoutHandler) scenarios = pyam.IamDataFrame(data_file_scenarios) assert MAGICC7.get_version() == EXPECTED_MAGICC_VERSION, MAGICC7.get_version() res = run( climate_models_cfgs={ "MAGICC7": [ { "core_climatesensitivity": 3, "rf_soxi_dir_wm2": -0.2, "out_temperature": 1, }, { "core_climatesensitivity": 2, "rf_soxi_dir_wm2": -0.1, "out_temperature": 1,
""" ================== Plot Regional Data ================== """ import matplotlib.pyplot as plt import pyam fname = 'msg_input.csv' df = pyam.IamDataFrame(fname) df = (df.filter({ 'variable': 'Emissions|CO2', 'year': 2050 }).filter({ 'region': 'World' }, keep=False).map_regions('iso')) print(df.head()) df.region_plot() plt.show()
""" ############################### # Read in tutorial data and show a summary # **************************************** # # This gallery uses the scenario data from the first-steps tutorial. # # If you haven't cloned the **pyam** GitHub repository to your machine, # you can download the file from # https://github.com/IAMconsortium/pyam/tree/master/doc/source/tutorials. # # Make sure to place the data file in the same folder as this script/notebook. import matplotlib.pyplot as plt import pyam df = pyam.IamDataFrame('tutorial_data.csv') df ############################### # Show relation of variables # ************************** # # In the first example, we show the relation between two variables, # biomass and fossil energy use. data = df.filter(region='World') data.plot.scatter(x='Primary Energy|Biomass', y='Primary Energy|Fossil', color='scenario') plt.tight_layout()