def _to_std(self): _log('Translating to standard format') xlator = utils.FormatTranslator() self.model = ( xlator.to_std(df=self.model.copy(), set_metadata=True) .set_index(utils.df_idx) .sort_index() ) self.hist = ( xlator.to_std(df=self.hist.copy(), set_metadata=False) .set_index(utils.df_idx) .sort_index() ) # override with special cases if more are found in history self.hist = self.hist[~self.hist.index.duplicated(keep='last')] # hackery required because unit needed for df_idx if self.overrides.empty: self.overrides = None else: self.overrides['Unit'] = 'kt' self.overrides = ( xlator.to_std(df=self.overrides.copy(), set_metadata=False) .set_index(utils.df_idx) .sort_index() ) self.overrides.columns = self.overrides.columns.str.lower() self.overrides = self.overrides['method']
def test_formatter_to_std(): df = pd.DataFrame({ 'Variable': [ 'CEDS+|9+ Sectors|Emissions|BC|foo|Unharmonized', 'Emissions|BC|bar|baz', ], 'Region': ['a', 'b'], '2010': [5.0, 2.0], '2020': [-1.0, 3.0], 'Unit': ['Mt foo/yr'] * 2, 'Model': ['foo'] * 2, 'Scenario': ['foo'] * 2, }) fmt = utils.FormatTranslator(df.copy()) obs = fmt.to_std() exp = pd.DataFrame({ 'sector': [ 'CEDS+|9+ Sectors|foo|Unharmonized', 'bar|baz', ], 'region': ['a', 'b'], '2010': [5000.0, 2000.0], '2020': [-1000.0, 3000.0], 'units': ['kt'] * 2, 'gas': ['BC'] * 2, }) pdt.assert_frame_equal(obs.set_index(utils.df_idx), exp.set_index(utils.df_idx))
def __init__(self, rc, hist, model, overrides, regions): """Parameters ---------- rc : aneris.RunControl hist : pd.DataFrame history in IAMC format model : pd.DataFrame model data in IAMC format overrides : pd.DataFrame harmonization overrides in IAMC format regions : pd.DataFrame regional aggregation mapping (ISO -> model regions) """ self.prefix = rc['prefix'] self.suffix = rc['suffix'] self.config = rc['config'] self.add_5regions = rc['add_5regions'] self.exog_files = rc['exogenous'] if 'exogenous' in rc else [] self.model = model self.hist = hist self.overrides = overrides self.regions = regions if not self.regions['ISO Code'].isin(['World']).any(): glb = { 'ISO Code': 'World', 'Country': 'World', 'Native Region Code': 'World', } _log('Manually adding global regional definition: {}'.format(glb)) self.regions = self.regions.append(glb, ignore_index=True) model_names = self.model.Model.unique() if len(model_names) > 1: raise ValueError('Can not have more than one model to harmonize') self.model_name = model_names[0] self._xlator = utils.FormatTranslator(prefix=self.prefix, suffix=self.suffix) self._model_dfs = [] self._metadata_dfs = [] self._diagnostic_dfs = [] self.exogenous_trajectories = self._exogenous_trajectories() # TODO better type checking? self.config['harmonize_year'] = str(self.config['harmonize_year']) y = self.config['harmonize_year'] if y not in model.columns: msg = 'Base year {} not found in model data. Existing columns are {}.' raise ValueError(msg.format(y, model.columns)) if y not in hist.columns: msg = 'Base year {} not found in hist data. Existing columns are {}.' raise ValueError(msg.format(y, hist.columns))
def test_formatter_to_template(): df = pd.DataFrame({ 'Variable': [ 'CEDS+|9+ Sectors|Emissions|BC|foo|Unharmonized', 'CEDS+|9+ Sectors|Emissions|BC|bar|Unharmonized', ], 'Region': ['a', 'b'], '2010': [5.0, 2.0], '2020': [-1.0, 3.0], 'Unit': ['Mt BC/yr'] * 2, 'Model': ['foo'] * 2, 'Scenario': ['foo'] * 2, }).set_index(utils.iamc_idx) fmt = utils.FormatTranslator(df, prefix='CEDS+|9+ Sectors', suffix='Unharmonized') fmt.to_std() obs = fmt.to_template() exp = df.reindex_axis(obs.columns, axis=1) pdt.assert_frame_equal(obs, exp)