def test_database_overwriting(tdb_with_data, start_scmrun): start_scmrun_2 = start_scmrun.copy() start_scmrun_2["ensemble_member"] = 1 # The target file will already exist so should merge files tdb_with_data.save(start_scmrun_2) out_names = glob(os.path.join( tdb_with_data._root_dir, "**", "*.nc", ), recursive=True) assert len(out_names) == 2 loaded_ts = tdb_with_data.load(climate_model="cmodel_a") assert_scmdf_almost_equal( loaded_ts, run_append([ start_scmrun.filter(climate_model="cmodel_a"), start_scmrun_2.filter(climate_model="cmodel_a"), ]), check_ts_names=False, ) loaded_ts = tdb_with_data.load() assert_scmdf_almost_equal(loaded_ts, run_append([start_scmrun, start_scmrun_2]), check_ts_names=False)
def test_single_timeseries(op, base_single_scmrun, other_single_scmrun): res = getattr(base_single_scmrun, op)(other_single_scmrun, op_cols={ "variable": "Emissions|CO2|AFOLU" }) exp_ts = perform_op(base_single_scmrun, other_single_scmrun, op, "variable") exp_ts["variable"] = "Emissions|CO2|AFOLU" if op in ["add", "subtract"]: exp_ts["unit"] = "gigatC / a" elif op == "multiply": exp_ts["unit"] = "gigatC ** 2 / a ** 2" elif op == "divide": exp_ts["unit"] = "dimensionless" exp = ScmRun(exp_ts) assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False)
def test_integration_multiple_ts(): variables = ["Emissions|CO2", "Heat Uptake", "Temperature"] start = get_multiple_ts( data=np.array([[1, 2, 3], [-1, -2, -3], [0, 5, 10]]).T, index=[2020, 2025, 2040], variable=variables, unit=["Mt CO2 / yr", "W / m^2", "K"], ) res = start.integrate() exp = get_single_ts( data=np.array([[0, 7.5, 45], [0, -7.5, -45], [0, 12.5, 125]]).T, index=[2020, 2025, 2040], variable=["Cumulative {}".format(v) for v in variables], unit=["Mt CO2", "W / m^2 * yr", "K * yr"], ) for v in variables: cv = "Cumulative {}".format(v) exp_comp = exp.filter(variable=cv) res_comp = res.filter(variable=cv).convert_unit( exp_comp.get_unique_meta("unit", no_duplicates=True), ) assert_scmdf_almost_equal(res_comp, exp_comp, allow_unordered=True, check_ts_names=False, rtol=1e-3)
def test_vector_ops_float_int(op, vector): start = get_multiple_ts( variable="Emissions|Gas", unit=["GtC / yr", "Mt CH4 / yr"], scenario=["scen_a", "scen_b"], ) exp_ts = perform_op_float_int(start, vector, op) exp = ScmRun(exp_ts) if op == "add": res = start + vector elif op == "subtract": res = start - vector elif op == "divide": res = start / vector elif op == "multiply": res = start * vector else: raise NotImplementedError(op) assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False)
def test_multiple_timeseries(op, base_multiple_scmrun, other_multiple_scmrun): res = getattr(base_multiple_scmrun, op)(other_multiple_scmrun, op_cols={ "scenario": "A to B" }) exp_ts = perform_op(base_multiple_scmrun, other_multiple_scmrun, op, "scenario") exp_ts["scenario"] = "A to B" if op in ["add", "subtract"]: exp_ts["unit"] = exp_ts["unit"].apply(convert_to_pint_name).values elif op == "multiply": exp_ts["unit"] = (exp_ts["unit"].apply( lambda x: convert_to_pint_name("({})**2".format(x))).values) elif op == "divide": exp_ts["unit"] = "dimensionless" exp = ScmRun(exp_ts) assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False)
def test_nc_to_run_4d(scm_run): df = scm_run.timeseries() val_cols = df.columns.tolist() df = df.reset_index() df["climate_model"] = "base_m" df["run_id"] = 1 df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df = [df] for climate_model in ["abc_m", "def_m", "ghi_m"]: for run_id in range(10): new_df = df.copy() new_df["run_id"] = run_id new_df["climate_model"] = climate_model new_df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df.append(new_df) scm_run = scm_run.__class__(pd.concat(big_df).reset_index(drop=True)) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc(scm_run, out_fname, dimensions=("scenario", "climate_model", "run_id")) assert exists(out_fname) run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_nc_to_run(scm_run, dimensions): with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc(scm_run, out_fname, dimensions=dimensions) assert exists(out_fname) run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_database_save_weird_slash(tdb, start_scmrun): weird_name = "cmodel/test" start_scmrun["climate_model"] = [weird_name, "other"] tdb.save(start_scmrun) assert len(start_scmrun.filter(climate_model=weird_name)) assert_scmdf_almost_equal( tdb.load(climate_model=weird_name), start_scmrun.filter(climate_model=weird_name), )
def test_run_to_nc_extra_instead_of_dimension_run_id(scm_run): with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") scm_run["run_id"] = [1, 2, 1] run_to_nc(scm_run, out_fname, dimensions=("scenario", ), extras=("run_id", )) loaded = ScmRun.from_nc(out_fname) assert_scmdf_almost_equal(scm_run, loaded, check_ts_names=False)
def test_nc_methods(scm_run): with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") scm_run.to_nc(out_fname, dimensions=("scenario", )) assert exists(out_fname) # Same as ScmRun.from_nc(out_fname) df = scm_run.__class__.from_nc(out_fname) assert isinstance(df, scm_run.__class__) assert_scmdf_almost_equal(scm_run, df, check_ts_names=False)
def test_nc_to_run_without_dimensions(scm_run): with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") scm_run = scm_run.filter(scenario="a_scenario2") scm_run["run_id"] = [2] run_to_nc(scm_run, out_fname, dimensions=(), extras=("run_id", )) assert exists(out_fname) run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_groupby(scm_run, g): # Check that the metadata for each group is unique for the dimensions being grouped # together def func(df): sub_df = df.meta[list(g)] for c in g: assert len(sub_df[c].unique()) == 1 return df res = scm_run.groupby(*g).map(func) assert isinstance(res, ScmRun) assert_scmdf_almost_equal(res, scm_run, check_ts_names=False)
def test_run_to_nc_loop_tricky_variable_name(scm_run, start_variable): # tests that the mapping between variable and units works even with # tricky variable names that get renamed in various was before serialising to # disk assert "Primary Energy|Coal" in scm_run.get_unique_meta("variable") scm_run["variable"] = scm_run["variable"].apply( lambda x: x.replace("Primary Energy|Coal", start_variable)) scm_run["unit"] = scm_run["variable"].apply( lambda x: "EJ/yr" if x != start_variable else "MJ / yr") with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") scm_run.to_nc(out_fname, dimensions=("scenario", )) loaded = ScmRun.from_nc(out_fname) assert_scmdf_almost_equal(scm_run, loaded, check_ts_names=False)
def test_warming_per_gt(): base = get_single_ts(variable="Surface Temperature", unit="K") other = get_single_ts(variable="Cumulative Emissions|CO2", unit="GtC") res = base.divide( other, op_cols={"variable": "Warming per Cumulative emissions CO2"}) exp_ts = perform_op(base, other, "divide", ["variable", "unit"]) exp_ts["variable"] = "Warming per Cumulative emissions CO2" exp_ts["unit"] = "kelvin / gigatC" exp = ScmRun(exp_ts) assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False)
def test_scalar_multiply_pint_by_run(): scalar = 1 * unit_registry("MtC / yr") start = get_multiple_ts(variable="Emissions|CO2", unit="GtC / yr", scenario=["scen_a", "scen_b"]) exp_ts = perform_pint_op(start, scalar, "multiply_inverse") exp = ScmRun(exp_ts) exp["unit"] = "megatC * gigatC / a**2" res = scalar * start assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False)
def test_nc_to_run_with_extras_id_not_needed_sparsity(scm_run): df = scm_run.filter(scenario="a_scenario").timeseries() val_cols = df.columns.tolist() df = df.reset_index() df["climate_model"] = "base_m" df["run_id"] = 1 df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df = [df] for climate_model in ["abc_m", "def_m", "ghi_m"]: for run_id in range(10): new_df = df.copy() new_df["run_id"] = run_id new_df["climate_model"] = climate_model new_df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df.append(new_df) big_df = pd.concat(big_df).reset_index(drop=True) big_df["paraset_id"] = big_df["run_id"].apply(lambda x: x // 3) scm_run = scm_run.__class__(big_df) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc( scm_run, out_fname, dimensions=("climate_model", "run_id"), extras=("paraset_id", ), ) assert exists(out_fname) xr_ds = xr.load_dataset(out_fname) # Should save with three dimensions: "time", "climate_model", "run_id" # There should be no "_id" as paraset_id is uniquely defined by "run_id" assert len(xr_ds["Primary_Energy"].shape) == 3 run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_nc_to_run_with_extras_sparsity(scm_run): df = scm_run.timeseries() val_cols = df.columns.tolist() df = df.reset_index() df["climate_model"] = "base_m" df["run_id"] = 1 df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df = [df] for climate_model in ["abc_m", "def_m", "ghi_m"]: for run_id in range(10): new_df = df.copy() new_df["run_id"] = run_id new_df["climate_model"] = climate_model new_df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df.append(new_df) scm_run = scm_run.__class__(pd.concat(big_df).reset_index(drop=True)) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc( scm_run, out_fname, dimensions=("climate_model", "run_id"), extras=("scenario", ), ) assert exists(out_fname) xr_ds = xr.load_dataset(out_fname) # Should save with four dimensions: "time", "climate_model", "run_id", "_id" # the "_id" dimension is required as a short-hand mapping between extras and # the data. # There is no way to avoid this sparsity. assert len(xr_ds["Primary_Energy"].shape) == 4 run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_nc_to_run_with_extras_id_needed_and_not_needed(scm_run): scmrun = scm_run.filter(scenario="a_scenario") full_df = [] for model in ("model_a", "model_b"): for scenario in ("scenario_a", "scenario_b"): for run_id in range(10): tmp = scmrun.timeseries() tmp["run_id"] = run_id tmp["model"] = model tmp["scenario"] = scenario tmp.index = tmp.index.droplevel(["model", "scenario"]) full_df.append(tmp) full_df = pd.concat(full_df) scm_run = scm_run.__class__(full_df) scm_run["paraset_id"] = scm_run["run_id"].apply(lambda x: x // 3) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc( scm_run, out_fname, dimensions=("climate_model", "run_id", "scenario"), extras=("paraset_id", "model"), ) assert exists(out_fname) xr_ds = xr.load_dataset(out_fname) # Should save with dimensions: "time", "climate_model", # "run_id", "scenario" and "_id" assert len(xr_ds["Primary_Energy"].shape) == 5 # model must be saved with id assert xr_ds["model"].dims == ("_id", ) # paraset_id is wholly defined by run_id assert xr_ds["paraset_id"].dims == ("run_id", ) run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_database_save_weird(tdb, start_scmrun, ch): weird_var_name = "variable" + ch start_scmrun["variable"] = [weird_var_name, "other"] tdb.save(start_scmrun) assert len(start_scmrun.filter(variable=weird_var_name)) assert_scmdf_almost_equal(tdb.load(variable=weird_var_name), start_scmrun.filter(variable=weird_var_name)) replace_ch = "-" if ch not in ".*" else ch exp = pd.DataFrame( [ ["cmodel_a", "variable" + replace_ch, "region", "scenario"], ["cmodel_b", "other", "region", "scenario"], ], columns=tdb.levels, ) pd.testing.assert_frame_equal(tdb.available_data(), exp)
def test_run_to_nc_dimensions_cover_all_metadata(): start = ScmRun( np.arange(6).reshape(3, 2), index=[2010, 2020, 2030], columns={ "variable": "Surface Temperature", "unit": "K", "model": ["model_a", "model_b"], "scenario": "scen_a", "region": "World", }, ) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") start.to_nc(out_fname, dimensions=("region", "model", "scenario")) loaded = ScmRun.from_nc(out_fname) assert_scmdf_almost_equal(start, loaded, check_ts_names=False)
def test_delta_per_delta_time_multiple_ts(): variables = ["Emissions|CO2", "Heat Uptake", "Temperature"] start = get_multiple_ts( data=np.array([[1, 2, 3], [-1, -2, -3], [0, 5, 10]]).T, index=[2020, 2025, 2040], variable=variables, unit=["Mt CO2", "J / m^2", "K"], ) res = start.delta_per_delta_time() assert sorted(res["unit"]) == sorted([ "CO2 * megametric_ton / second", "joule / meter ** 2 / second", "kelvin / second", ]) res = (res.convert_unit("Mt CO2 / yr", variable="Delta Emissions|CO2").convert_unit( "J / m^2 / yr", variable="Delta Heat Uptake").convert_unit( "K / yr", variable="Delta Temperature")) exp = get_single_ts( data=np.array([[1 / 5, 1 / 15], [-1 / 5, -1 / 15], [5 / 5, 5 / 15]]).T, index=[2022.5, (2025 + 2040) / 2], variable=["Delta {}".format(v) for v in variables], unit=["Mt CO2 / yr", "J / m^2 / yr", "K / yr"], ) for v in variables: cv = "Delta {}".format(v) exp_comp = exp.filter(variable=cv) res_comp = res.filter(variable=cv).convert_unit( exp_comp.get_unique_meta("unit", no_duplicates=True), ) assert_scmdf_almost_equal(res_comp, exp_comp, allow_unordered=True, check_ts_names=False, rtol=1e-3)
def test_multiple_ops_cols(): base = get_single_ts(variable="Surface Temperature", unit="K") other = get_single_ts(variable="Cumulative Emissions|CO2", unit="GtC") res = base.add( other, op_cols={ "variable": "Warming plus Cumulative emissions CO2", "unit": "nonsense", }, ) exp_ts = perform_op(base, other, "add", ["variable", "unit"]) exp_ts["variable"] = "Warming plus Cumulative emissions CO2" exp_ts["unit"] = "nonsense" exp = ScmRun(exp_ts) assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False)
def test_integration(out_var): dat = [1, 2, 3] start = get_single_ts(data=dat, index=[1, 2, 3], unit="GtC / yr") res = start.integrate(out_var=out_var) if out_var is None: exp_var = ("Cumulative " + start["variable"]).values else: exp_var = out_var exp = get_single_ts(data=np.array([0, 1.5, 4]), index=[1, 2, 3], variable=exp_var, unit="gigatC") # rtol is because our calculation uses seconds, which doesn't work out # quite the same as assuming a regular year assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False, rtol=1e-3)
def test_delta_per_delta_time(out_var): dat = [1, 2, 3] start = get_single_ts(data=dat, index=[1, 2, 3], unit="GtC / yr") res = start.delta_per_delta_time( out_var=out_var).convert_unit("GtC / yr^2") if out_var is None: exp_var = ("Delta " + start["variable"]).values else: exp_var = out_var exp = get_single_ts(data=np.array([1, 1]), index=[1.5, 2.5], variable=exp_var, unit="GtC / yr^2") # rtol is because our calculation uses seconds, which doesn't work out # quite the same as assuming a regular year assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False, rtol=1e-3)
def test_vector_ops_pint(op): vector = np.arange(3) * unit_registry("MtC / yr") start = get_multiple_ts(variable="Emissions|CO2", unit="GtC / yr", scenario=["scen_a", "scen_b"]) exp_ts = perform_pint_op(start, vector, op) exp = ScmRun(exp_ts) if op in ["add", "subtract"]: exp["unit"] = "gigatC / a" elif op == "multiply": exp["unit"] = "gigatC * megatC / a ** 2" elif op == "divide": exp["unit"] = "gigatC / megatC" if op == "add": res = start + vector elif op == "subtract": res = start - vector elif op == "divide": res = start / vector elif op == "multiply": res = start * vector else: raise NotImplementedError(op) assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False)
def test_linear_regression_scmrun(): start = get_multiple_ts( data=np.array([[1, 2, 3], [-1, -2, -3], [0, 8, 10], [0, 5, 10]]).T, index=[1969, 1970, 1971], variable="Emissions|CO2", unit=["Mt CO2 / yr", "Mt CO2 / yr", "Mt CO2 / yr", "GtC / yr"], scenario=["a", "b", "c", "d"], ) res = start.linear_regression_scmrun() exp = get_multiple_ts( data=np.array([[1, 2, 3], [-1, -2, -3], [1, 6, 11], [0, 5, 10]]).T, index=[1969, 1970, 1971], variable="Emissions|CO2", unit=["Mt CO2 / yr", "Mt CO2 / yr", "Mt CO2 / yr", "GtC / yr"], scenario=["a", "b", "c", "d"], ) assert_scmdf_almost_equal(res, exp, allow_unordered=True, check_ts_names=False, rtol=1e-3)
def test_database_load_data(tdb_with_data, start_scmrun, filter): loaded_ts = tdb_with_data.load(**filter) assert_scmdf_almost_equal(loaded_ts, start_scmrun.filter(**filter), check_ts_names=False)