def test_database_overwriting(tdb_with_data, start_scmrun): start_scmrun_2 = start_scmrun.copy() start_scmrun_2["ensemble_member"] = 1 # The target file will already exist so should merge files tdb_with_data.save(start_scmrun_2) out_names = glob(os.path.join( tdb_with_data._root_dir, "**", "*.nc", ), recursive=True) assert len(out_names) == 2 loaded_ts = tdb_with_data.load(climate_model="cmodel_a") assert_scmdf_almost_equal( loaded_ts, run_append([ start_scmrun.filter(climate_model="cmodel_a"), start_scmrun_2.filter(climate_model="cmodel_a"), ]), check_ts_names=False, ) loaded_ts = tdb_with_data.load() assert_scmdf_almost_equal(loaded_ts, run_append([start_scmrun, start_scmrun_2]), check_ts_names=False)
def get_multiple_units_scm_run(scm_run, new_unit, new_unit_alternate): first_var = scm_run.get_unique_meta("variable")[0] scm_run_first_var = scm_run.filter(variable=first_var) scm_run_first_var["unit"] = [ v if i >= 1 else new_unit if v != new_unit else new_unit_alternate for i, v in enumerate(scm_run_first_var["unit"].tolist()) ] scm_run_other_vars = scm_run.filter(variable=first_var, keep=False) return scmdata.run_append([scm_run_first_var, scm_run_other_vars])
def diagnose_tcr_ecs_tcre(direction, **kwargs): # more generic handling of positive and negative ECS testing # borrows heavily from pymagicc and carries AGPL3 license. # diagnose_tcr_ecs_tcre([pos|neg], **kwargs): global abrupt0p5 global onepctcdr cwd = split(abspath('__file__'))[0] if not 'SCEN_DIR' in kwargs: SCEN = 'SCEN' else: try: SCEN = kwargs['SCEN_DIR'] SCEN_DIR = join(cwd, SCEN) abrupt0p5 = join(SCEN_DIR, 'ABRUPT0P5XCO2_CO2_CONC.IN') onepctcdr = join(SCEN_DIR, '1PCTCDR_CO2_CONC.IN') for file in (abrupt0p5, onepctcdr): if isfile(file) is False: raise FileNotFoundError from FileNotFoundError except FileNotFoundError: SCEN = 'SCEN' SCEN_DIR = join(cwd, SCEN) abrupt0p5 = join(SCEN_DIR, 'ABRUPT0P5XCO2_CO2_CONC.IN') onepctcdr = join(SCEN_DIR, '1PCTCDR_CO2_CONC.IN') ecscfg = { 'startyear': 1795, 'endyear': 4321, 'core_climatesensitivity': 3.6, } ecscfg['core_climatesensitivity'] = kwargs['CORE_CLIMATESENSITIVITY'] ecscfg['core_delq2xco2'] = kwargs['CORE_DELQ2XCO2'] ecscfg['rf_total_constantafteryr'] = 2000 tcrcfg = { 'startyear': 1750, 'endyear': 2570, 'core_climatesensitivity': 3.6, } tcrcfg['core_climatesensitivity'] = kwargs['CORE_CLIMATESENSITIVITY'] tcrcfg['rf_total_constantafteryr'] = 3000 ecs_res = diagnose_ecs(direction, **ecscfg, **tstcfg) tcr_tcre_res = diagnose_tcr_tcre(direction, **tcrcfg, **tstcfg) out = {**ecs_res, **tcr_tcre_res} out['timeseries'] = run_append([ ecs_res['timeseries'], tcr_tcre_res['timeseries'], ]) return out
def run_fair(cfgs, output_vars): # pylint: disable=R0914 """ Run FaIR Parameters ---------- cfgs : list[dict] List of configurations with which to run FaIR output_vars : list[str] Variables to output Returns ------- :obj:`ScmRun` :obj:`ScmRun` instance with all results. """ res = [] for cfg in cfgs: scenario = cfg.pop("scenario") model = cfg.pop("model") run_id = cfg.pop("run_id") factors = {} factors["gmst"] = cfg.pop("gmst_factor") factors["ohu"] = cfg.pop("ohu_factor") data, unit, nt = _process_output(fair_scm(**cfg), output_vars, factors) data_scmrun = [] variables = [] units = [] for key, variable in data.items(): variables.append(key) data_scmrun.append(variable) units.append(unit[key]) tempres = ScmRun( np.vstack(data_scmrun).T, index=np.arange(1765, 1765 + nt), columns={ "scenario": scenario, "model": model, "region": "World", "variable": variables, "unit": units, "run_id": run_id, }, ) res.append(tempres) res = run_append(res) return res
def run_over_cfgs(self, cfgs, output_variables): """ Run over each configuration parameter set write parameterfiles, run, read results and make an ScmRun with results """ runs = [] for i, pamset in enumerate(cfgs): self.pamfilewriter.write_parameterfile( pamset, os.path.join(self.rundir, re.sub("[^a-zA-Z0-9_-]", "", self.scen)), ) call = "{executable} {pamfile}".format( executable=os.path.join(self.rundir, "scm_vCH4fb"), pamfile=os.path.join( self.rundir, re.sub("[^a-zA-Z0-9_-]", "", self.scen), "inputfiles", "pam_current.scm", ), ) LOGGER.debug("Call, %s", call) subprocess.check_call( call, cwd=self.rundir, shell=True, # nosec # have to use subprocess ) for variable in output_variables: ( years, timeseries, unit, ) = self.resultsreader.read_variable_timeseries( self.scen, variable, self.sfilewriter ) if years.empty: # pragma: no cover continue # pragma: no cover runs.append( ScmRun( pd.Series(timeseries, index=years), columns={ "climate_model": "CICERO-SCM", "model": self.model, "run_id": pamset.get("Index", i), "scenario": self.scen, "region": ["World"], "variable": [variable], "unit": [unit], }, ) ) return run_append(runs)
def run_fair(cfgs, output_vars): # pylint: disable=R0914 """ Run FaIR Parameters ---------- cfgs : list[dict] List of configurations with which to run FaIR output_vars : list[str] Variables to output Returns ------- :obj:`ScmRun` :obj:`ScmRun` instance with all results. """ res = [] updated_config = [] for i, cfg in enumerate(cfgs): updated_config.append({}) for key, value in cfg.items(): if isinstance(value, list): updated_config[i][key] = np.asarray(value) else: updated_config[i][key] = value updated_config[i]["output_vars"] = output_vars ncpu = int(config.get("FAIR_WORKER_NUMBER", multiprocessing.cpu_count())) LOGGER.info("Running FaIR with %s workers", ncpu) parallel_process_kwargs = dict( func=_single_fair_iteration, configuration=updated_config, config_are_kwargs=False, ) if ncpu > 1: with ProcessPoolExecutor(ncpu) as pool: res = _parallel_process( **parallel_process_kwargs, pool=pool, ) else: res = _parallel_process(**parallel_process_kwargs) res = run_append(res) return res
def run_ciceroscm_parallel(scenarios, cfgs, output_vars): """ Run CICEROSCM in parallel Parameters ---------- scenarios : IamDataFrame Scenariodata with which to run cfgs : list[dict] List of configurations with which to run CICEROSCM output_vars : list[str] Variables to output (may require some fiddling with ``out_x`` variables in ``cfgs`` to get this right) Returns ------- :obj:`ScmRun` :obj:`ScmRun` instance with all results. """ LOGGER.info("Entered _parallel_ciceroscm") runs = [{ "cfgs": cfgs, "output_variables": output_vars, "scenariodata": smdf } for ( scen, model), smdf in scenarios.timeseries().groupby(["scenario", "model"])] max_workers = int(config.get("CICEROSCM_WORKER_NUMBER", os.cpu_count())) LOGGER.info("Running in parallel with up to %d workers", max_workers) with ProcessPoolExecutor(max_workers=max_workers) as pool: result = _parallel_process( func=_execute_run, configuration=runs, pool=pool, config_are_kwargs=True, # no front runs as these defeat the purpose with CICERO-SCM (because # it is only parallel on scenarios, not configs) front_serial=FRONT_SERIAL, front_parallel=FRONT_PARALLEL, ) LOGGER.info("Appending CICERO-SCM results into a single ScmRun") result = scmdata.run_append([r for r in result if r is not None]) return result
def _fix_pint_incompatible_units(inp): out = inp conversions = (("10^22 J", 10, "ZJ"),) for odd_unit, conv_factor, new_unit in conversions: if odd_unit in inp.get_unique_meta("unit"): LOGGER.debug( "Converting %s to %s with a conversion factor of %f", odd_unit, new_unit, conv_factor, ) rest_ts = inp.filter(unit=odd_unit, keep=False) odd_unit_ts = inp.filter(unit=odd_unit) odd_unit_ts *= conv_factor odd_unit_ts["unit"] = new_unit out = run_append([rest_ts, odd_unit_ts]) return out
def load(self, disable_tqdm=False, **filters): """ Load data from the database Parameters ---------- disable_tqdm: bool If True, do not show the progress bar filters: dict of str : [str, list[str]] Filters for the data to load. Defaults to loading all values for a level if it isn't specified. If a filter is a list then OR logic is applied within the level. For example, if we have ``scenario=["ssp119", "ssp126"]`` then both the ssp119 and ssp126 scenarios will be loaded. Returns ------- :class:`scmdata.ScmRun` Loaded data Raises ------ ValueError If a filter for a level not in :attr:`levels` is specified If no data matching ``filters`` is found """ filters = self._clean_filters(filters) load_files = self._backend.get(filters) return run_append([ self._backend.load(f) for f in tqdman.tqdm( load_files, desc="Loading files", leave=False, disable=disable_tqdm, ) ])
def save(self, sr): """ Save a ScmRun to the database The dataset should not contain any duplicate metadata for the database levels Parameters ---------- sr : :class:`scmdata.ScmRun` Data to save Raises ------ ValueError If duplicate metadata are present for the requested database levels KeyError If metadata for the requested database levels are not found Returns ------- str Key where the data is saved """ key = self.get_key(sr) ensure_dir_exists(key) if os.path.exists(key): existing_run = ScmRun.from_nc(key) sr = run_append([existing_run, sr]) # Check for required extra dimensions dimensions = self.kwargs.get("dimensions", None) if not dimensions: nunique_meta_vals = sr.meta.nunique() dimensions = nunique_meta_vals[ nunique_meta_vals > 1].index.tolist() sr.to_nc(key, dimensions=dimensions) return key
def calculate_quantiles( scmdf, quantiles, process_over_columns=("run_id", "ensemble_member", "climate_model"), ): """ Calculate quantiles of an :obj:`ScmRun` Parameters ---------- scmdf : :obj:`ScmRun` :obj:`ScmRun` containing the data from which to calculate the quantiles quantiles : list of float quantiles to calculate (must be in [0, 1]) process_over_columns : list of str Columns to process over. All other columns in ``scmdf.meta`` will be included in the output. Returns ------- :obj:`ScmRun` :obj:`ScmRun` containing the quantiles of interest, processed over ``process_over_columns`` """ out = [] for quant in quantiles: quantile_df = scmdf.process_over(process_over_columns, "quantile", q=quant) quantile_df["quantile"] = quant out.append(quantile_df) out = scmdata.run_append([scmdata.ScmRun(o) for o in out]) return out
def run( climate_models_cfgs, scenarios, output_variables=("Surface Temperature",), out_config=None, ): # pylint: disable=W9006 """ Run a number of climate models over a number of scenarios Parameters ---------- climate_models_cfgs : dict[str: list] Dictionary where each key is a model and each value is the configs with which to run the model. The configs are passed to the model adapter. scenarios : :obj:`pyam.IamDataFrame` Scenarios to run output_variables : list[str] Variables to include in the output out_config : dict[str: tuple of str] Dictionary where each key is a model and each value is a tuple of configuration values to include in the output's metadata. Returns ------- :obj:`scmdata.ScmRun` Model output Raises ------ KeyError ``out_config`` has keys which are not in ``climate_models_cfgs`` TypeError A value in ``out_config`` is not a :obj:`tuple` """ _check_out_config(out_config, climate_models_cfgs) res = [] for climate_model, cfgs in progress( climate_models_cfgs.items(), desc="Climate models" ): if climate_model == "MAGICC7": runner = MAGICC7() elif climate_model.upper() == "FAIR": # allow various capitalisations runner = FAIR() elif climate_model.upper() == "CICEROSCM": # allow various capitalisations runner = CICEROSCM() else: raise NotImplementedError( "No adapter available for {}".format(climate_model) ) if out_config is not None and climate_model in out_config: output_config_cm = out_config[climate_model] LOGGER.debug( "Using output config: %s for %s", output_config_cm, climate_model ) else: LOGGER.debug("No output config for %s", climate_model) output_config_cm = None model_res = runner.run( scenarios, cfgs, output_variables=output_variables, output_config=output_config_cm, ) res.append(model_res) for i, model_res in enumerate(res): if i < 1: key_meta = set(model_res.meta.columns.tolist()) model_meta = set(model_res.meta.columns.tolist()) climate_model = model_res.get_unique_meta("climate_model") if model_meta != key_meta: # noqa raise AssertionError( "{} meta: {}, expected meta: {}".format( climate_model, model_meta, key_meta ) ) if len(res) == 1: LOGGER.info("Only one model run, returning its results") scmdf = res[0] else: LOGGER.info("Appending model results") scmdf = scmdata.run_append(res) return scmdf
def run_magicc_parallel(cfgs, output_vars, output_config): """ Run MAGICC in parallel using compact out files Parameters ---------- cfgs : list[dict] List of configurations with which to run MAGICC output_vars : list[str] Variables to output (may require some fiddling with ``out_x`` variables in ``cfgs`` to get this right) output_config : tuple[str] Configuration to include in the output Returns ------- :obj:`ScmRun` :obj:`ScmRun` instance with all results. """ LOGGER.info("Entered _parallel_magicc_compact_out") shared_manager = multiprocessing.Manager() shared_dict = shared_manager.dict() instances = _MagiccInstances(existing_instances=shared_dict) runs = [{ "cfg": { **cfg, "only": output_vars, "output_config": output_config }, "run_func": _run_func, "setup_func": _setup_func, "instances": instances, } for cfg in cfgs] try: pool = ProcessPoolExecutor( max_workers=int(config.get("MAGICC_WORKER_NUMBER", os.cpu_count())), initializer=_init_magicc_worker, initargs=(shared_dict, ), ) res = _parallel_process( func=_execute_run, configuration=runs, pool=pool, config_are_kwargs=True, front_serial=2, front_parallel=2, ) LOGGER.info("Appending results into a single ScmRun") res = scmdata.run_append([r for r in res if r is not None]) finally: instances.cleanup() LOGGER.info("Shutting down parallel pool") shared_manager.shutdown() pool.shutdown() return res
def run_magicc_parallel(cfgs, output_vars, output_config): """ Run MAGICC in parallel using compact out files Parameters ---------- cfgs : list[dict] List of configurations with which to run MAGICC output_vars : list[str] Variables to output output_config : tuple[str] Configuration to include in the output Returns ------- :obj:`ScmRun` :obj:`ScmRun` instance with all results. """ LOGGER.info("Entered _parallel_magicc_compact_out") shared_manager = multiprocessing.Manager() shared_dict = shared_manager.dict() instances = _MagiccInstances(existing_instances=shared_dict) magicc_internal_vars = [ "DAT_{}".format( pymagicc.definitions.convert_magicc7_to_openscm_variables( v, inverse=True)) for v in output_vars ] runs = [{ "cfg": { **cfg, "only": output_vars, "out_dynamic_vars": magicc_internal_vars, "output_config": output_config, }, "run_func": _run_func, "setup_func": _setup_func, "instances": instances, } for cfg in cfgs] try: max_workers = int( config.get("MAGICC_WORKER_NUMBER", multiprocessing.cpu_count())) LOGGER.info("Running in parallel with up to %d workers", max_workers) pool = ProcessPoolExecutor( # pylint:disable=consider-using-with # need to handle shared_manager too max_workers=max_workers, initializer=_init_magicc_worker, initargs=(shared_dict, ), ) res = _parallel_process( func=_execute_run, configuration=runs, pool=pool, config_are_kwargs=True, front_serial=2, front_parallel=2, ) LOGGER.info("Appending results into a single ScmRun") res = scmdata.run_append([r for r in res if r is not None]) finally: instances.cleanup() LOGGER.info("Shutting down parallel pool") shared_manager.shutdown() pool.shutdown() return res
plt.style.use("ggplot") plt.rcParams["figure.figsize"] = 10, 5 plt.rcParams["font.family"] = "serif" plt.rcParams["font.size"] = 12 output_path = os.path.join( os.path.dirname(__file__), './example-plot.png' ) results = [] for scen in rcps.groupby("scenario"): results_scen = pymagicc.run(scen) results.append(results_scen) results = scmdata.run_append(results) temperature_rel_to_1850_1900 = ( results .filter(variable="Surface Temperature", region="World") .relative_to_ref_period_mean(year=range(1850, 1900 + 1)) ) temperature_rel_to_1850_1900.lineplot() plt.title("Global Mean Temperature Projection") plt.ylabel("°C over pre-industrial (1850-1900 mean)") plt.savefig(output_path, dpi=96)