Beispiel #1
0
def test_database_overwriting(tdb_with_data, start_scmrun):
    start_scmrun_2 = start_scmrun.copy()
    start_scmrun_2["ensemble_member"] = 1

    # The target file will already exist so should merge files
    tdb_with_data.save(start_scmrun_2)

    out_names = glob(os.path.join(
        tdb_with_data._root_dir,
        "**",
        "*.nc",
    ),
                     recursive=True)
    assert len(out_names) == 2

    loaded_ts = tdb_with_data.load(climate_model="cmodel_a")
    assert_scmdf_almost_equal(
        loaded_ts,
        run_append([
            start_scmrun.filter(climate_model="cmodel_a"),
            start_scmrun_2.filter(climate_model="cmodel_a"),
        ]),
        check_ts_names=False,
    )

    loaded_ts = tdb_with_data.load()
    assert_scmdf_almost_equal(loaded_ts,
                              run_append([start_scmrun, start_scmrun_2]),
                              check_ts_names=False)
Beispiel #2
0
def test_single_timeseries(op, base_single_scmrun, other_single_scmrun):
    res = getattr(base_single_scmrun, op)(other_single_scmrun,
                                          op_cols={
                                              "variable": "Emissions|CO2|AFOLU"
                                          })

    exp_ts = perform_op(base_single_scmrun, other_single_scmrun, op,
                        "variable")
    exp_ts["variable"] = "Emissions|CO2|AFOLU"

    if op in ["add", "subtract"]:
        exp_ts["unit"] = "gigatC / a"

    elif op == "multiply":
        exp_ts["unit"] = "gigatC ** 2 / a ** 2"

    elif op == "divide":
        exp_ts["unit"] = "dimensionless"

    exp = ScmRun(exp_ts)

    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False)
Beispiel #3
0
def test_integration_multiple_ts():
    variables = ["Emissions|CO2", "Heat Uptake", "Temperature"]
    start = get_multiple_ts(
        data=np.array([[1, 2, 3], [-1, -2, -3], [0, 5, 10]]).T,
        index=[2020, 2025, 2040],
        variable=variables,
        unit=["Mt CO2 / yr", "W / m^2", "K"],
    )

    res = start.integrate()

    exp = get_single_ts(
        data=np.array([[0, 7.5, 45], [0, -7.5, -45], [0, 12.5, 125]]).T,
        index=[2020, 2025, 2040],
        variable=["Cumulative {}".format(v) for v in variables],
        unit=["Mt CO2", "W / m^2 * yr", "K * yr"],
    )

    for v in variables:
        cv = "Cumulative {}".format(v)
        exp_comp = exp.filter(variable=cv)
        res_comp = res.filter(variable=cv).convert_unit(
            exp_comp.get_unique_meta("unit", no_duplicates=True), )

        assert_scmdf_almost_equal(res_comp,
                                  exp_comp,
                                  allow_unordered=True,
                                  check_ts_names=False,
                                  rtol=1e-3)
Beispiel #4
0
def test_vector_ops_float_int(op, vector):
    start = get_multiple_ts(
        variable="Emissions|Gas",
        unit=["GtC / yr", "Mt CH4 / yr"],
        scenario=["scen_a", "scen_b"],
    )

    exp_ts = perform_op_float_int(start, vector, op)
    exp = ScmRun(exp_ts)

    if op == "add":
        res = start + vector

    elif op == "subtract":
        res = start - vector

    elif op == "divide":
        res = start / vector

    elif op == "multiply":
        res = start * vector

    else:
        raise NotImplementedError(op)

    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False)
Beispiel #5
0
def test_multiple_timeseries(op, base_multiple_scmrun, other_multiple_scmrun):
    res = getattr(base_multiple_scmrun, op)(other_multiple_scmrun,
                                            op_cols={
                                                "scenario": "A to B"
                                            })

    exp_ts = perform_op(base_multiple_scmrun, other_multiple_scmrun, op,
                        "scenario")
    exp_ts["scenario"] = "A to B"

    if op in ["add", "subtract"]:
        exp_ts["unit"] = exp_ts["unit"].apply(convert_to_pint_name).values

    elif op == "multiply":
        exp_ts["unit"] = (exp_ts["unit"].apply(
            lambda x: convert_to_pint_name("({})**2".format(x))).values)

    elif op == "divide":
        exp_ts["unit"] = "dimensionless"

    exp = ScmRun(exp_ts)

    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False)
Beispiel #6
0
def test_nc_to_run_4d(scm_run):
    df = scm_run.timeseries()
    val_cols = df.columns.tolist()
    df = df.reset_index()

    df["climate_model"] = "base_m"
    df["run_id"] = 1
    df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols))

    big_df = [df]
    for climate_model in ["abc_m", "def_m", "ghi_m"]:
        for run_id in range(10):
            new_df = df.copy()
            new_df["run_id"] = run_id
            new_df["climate_model"] = climate_model
            new_df.loc[:, val_cols] = np.random.rand(df.shape[0],
                                                     len(val_cols))

            big_df.append(new_df)

    scm_run = scm_run.__class__(pd.concat(big_df).reset_index(drop=True))

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        run_to_nc(scm_run,
                  out_fname,
                  dimensions=("scenario", "climate_model", "run_id"))

        assert exists(out_fname)

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #7
0
def test_nc_to_run(scm_run, dimensions):
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        run_to_nc(scm_run, out_fname, dimensions=dimensions)

        assert exists(out_fname)

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #8
0
def test_database_save_weird_slash(tdb, start_scmrun):
    weird_name = "cmodel/test"
    start_scmrun["climate_model"] = [weird_name, "other"]

    tdb.save(start_scmrun)

    assert len(start_scmrun.filter(climate_model=weird_name))
    assert_scmdf_almost_equal(
        tdb.load(climate_model=weird_name),
        start_scmrun.filter(climate_model=weird_name),
    )
Beispiel #9
0
def test_run_to_nc_extra_instead_of_dimension_run_id(scm_run):
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        scm_run["run_id"] = [1, 2, 1]

        run_to_nc(scm_run,
                  out_fname,
                  dimensions=("scenario", ),
                  extras=("run_id", ))
        loaded = ScmRun.from_nc(out_fname)

    assert_scmdf_almost_equal(scm_run, loaded, check_ts_names=False)
Beispiel #10
0
def test_nc_methods(scm_run):
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        scm_run.to_nc(out_fname, dimensions=("scenario", ))

        assert exists(out_fname)

        # Same as ScmRun.from_nc(out_fname)
        df = scm_run.__class__.from_nc(out_fname)

        assert isinstance(df, scm_run.__class__)
        assert_scmdf_almost_equal(scm_run, df, check_ts_names=False)
Beispiel #11
0
def test_nc_to_run_without_dimensions(scm_run):
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        scm_run = scm_run.filter(scenario="a_scenario2")
        scm_run["run_id"] = [2]
        run_to_nc(scm_run, out_fname, dimensions=(), extras=("run_id", ))

        assert exists(out_fname)

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #12
0
def test_groupby(scm_run, g):
    # Check that the metadata for each group is unique for the dimensions being grouped
    # together
    def func(df):
        sub_df = df.meta[list(g)]

        for c in g:
            assert len(sub_df[c].unique()) == 1
        return df

    res = scm_run.groupby(*g).map(func)

    assert isinstance(res, ScmRun)
    assert_scmdf_almost_equal(res, scm_run, check_ts_names=False)
Beispiel #13
0
def test_run_to_nc_loop_tricky_variable_name(scm_run, start_variable):
    # tests that the mapping between variable and units works even with
    # tricky variable names that get renamed in various was before serialising to
    # disk
    assert "Primary Energy|Coal" in scm_run.get_unique_meta("variable")
    scm_run["variable"] = scm_run["variable"].apply(
        lambda x: x.replace("Primary Energy|Coal", start_variable))
    scm_run["unit"] = scm_run["variable"].apply(
        lambda x: "EJ/yr" if x != start_variable else "MJ / yr")

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        scm_run.to_nc(out_fname, dimensions=("scenario", ))
        loaded = ScmRun.from_nc(out_fname)

    assert_scmdf_almost_equal(scm_run, loaded, check_ts_names=False)
Beispiel #14
0
def test_warming_per_gt():
    base = get_single_ts(variable="Surface Temperature", unit="K")
    other = get_single_ts(variable="Cumulative Emissions|CO2", unit="GtC")

    res = base.divide(
        other, op_cols={"variable": "Warming per Cumulative emissions CO2"})

    exp_ts = perform_op(base, other, "divide", ["variable", "unit"])
    exp_ts["variable"] = "Warming per Cumulative emissions CO2"
    exp_ts["unit"] = "kelvin / gigatC"

    exp = ScmRun(exp_ts)

    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False)
Beispiel #15
0
def test_scalar_multiply_pint_by_run():
    scalar = 1 * unit_registry("MtC / yr")
    start = get_multiple_ts(variable="Emissions|CO2",
                            unit="GtC / yr",
                            scenario=["scen_a", "scen_b"])

    exp_ts = perform_pint_op(start, scalar, "multiply_inverse")
    exp = ScmRun(exp_ts)

    exp["unit"] = "megatC * gigatC / a**2"

    res = scalar * start

    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False)
Beispiel #16
0
def test_nc_to_run_with_extras_id_not_needed_sparsity(scm_run):
    df = scm_run.filter(scenario="a_scenario").timeseries()
    val_cols = df.columns.tolist()
    df = df.reset_index()

    df["climate_model"] = "base_m"
    df["run_id"] = 1
    df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols))

    big_df = [df]
    for climate_model in ["abc_m", "def_m", "ghi_m"]:
        for run_id in range(10):
            new_df = df.copy()
            new_df["run_id"] = run_id
            new_df["climate_model"] = climate_model
            new_df.loc[:, val_cols] = np.random.rand(df.shape[0],
                                                     len(val_cols))

            big_df.append(new_df)

    big_df = pd.concat(big_df).reset_index(drop=True)
    big_df["paraset_id"] = big_df["run_id"].apply(lambda x: x // 3)
    scm_run = scm_run.__class__(big_df)

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        run_to_nc(
            scm_run,
            out_fname,
            dimensions=("climate_model", "run_id"),
            extras=("paraset_id", ),
        )

        assert exists(out_fname)

        xr_ds = xr.load_dataset(out_fname)
        # Should save with three dimensions: "time", "climate_model", "run_id"
        # There should be no "_id" as paraset_id is uniquely defined by "run_id"
        assert len(xr_ds["Primary_Energy"].shape) == 3

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #17
0
def test_nc_to_run_with_extras_sparsity(scm_run):
    df = scm_run.timeseries()
    val_cols = df.columns.tolist()
    df = df.reset_index()

    df["climate_model"] = "base_m"
    df["run_id"] = 1
    df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols))

    big_df = [df]
    for climate_model in ["abc_m", "def_m", "ghi_m"]:
        for run_id in range(10):
            new_df = df.copy()
            new_df["run_id"] = run_id
            new_df["climate_model"] = climate_model
            new_df.loc[:, val_cols] = np.random.rand(df.shape[0],
                                                     len(val_cols))

            big_df.append(new_df)

    scm_run = scm_run.__class__(pd.concat(big_df).reset_index(drop=True))

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        run_to_nc(
            scm_run,
            out_fname,
            dimensions=("climate_model", "run_id"),
            extras=("scenario", ),
        )

        assert exists(out_fname)

        xr_ds = xr.load_dataset(out_fname)
        # Should save with four dimensions: "time", "climate_model", "run_id", "_id"
        # the "_id" dimension is required as a short-hand mapping between extras and
        # the data.
        # There is no way to avoid this sparsity.
        assert len(xr_ds["Primary_Energy"].shape) == 4

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #18
0
def test_nc_to_run_with_extras_id_needed_and_not_needed(scm_run):
    scmrun = scm_run.filter(scenario="a_scenario")

    full_df = []
    for model in ("model_a", "model_b"):
        for scenario in ("scenario_a", "scenario_b"):
            for run_id in range(10):
                tmp = scmrun.timeseries()
                tmp["run_id"] = run_id
                tmp["model"] = model
                tmp["scenario"] = scenario
                tmp.index = tmp.index.droplevel(["model", "scenario"])
                full_df.append(tmp)

    full_df = pd.concat(full_df)
    scm_run = scm_run.__class__(full_df)
    scm_run["paraset_id"] = scm_run["run_id"].apply(lambda x: x // 3)

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        run_to_nc(
            scm_run,
            out_fname,
            dimensions=("climate_model", "run_id", "scenario"),
            extras=("paraset_id", "model"),
        )

        assert exists(out_fname)

        xr_ds = xr.load_dataset(out_fname)

        # Should save with dimensions: "time", "climate_model",
        # "run_id", "scenario" and "_id"
        assert len(xr_ds["Primary_Energy"].shape) == 5

        # model must be saved with id
        assert xr_ds["model"].dims == ("_id", )
        # paraset_id is wholly defined by run_id
        assert xr_ds["paraset_id"].dims == ("run_id", )

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #19
0
def test_database_save_weird(tdb, start_scmrun, ch):
    weird_var_name = "variable" + ch
    start_scmrun["variable"] = [weird_var_name, "other"]
    tdb.save(start_scmrun)

    assert len(start_scmrun.filter(variable=weird_var_name))
    assert_scmdf_almost_equal(tdb.load(variable=weird_var_name),
                              start_scmrun.filter(variable=weird_var_name))

    replace_ch = "-" if ch not in ".*" else ch
    exp = pd.DataFrame(
        [
            ["cmodel_a", "variable" + replace_ch, "region", "scenario"],
            ["cmodel_b", "other", "region", "scenario"],
        ],
        columns=tdb.levels,
    )

    pd.testing.assert_frame_equal(tdb.available_data(), exp)
Beispiel #20
0
def test_run_to_nc_dimensions_cover_all_metadata():
    start = ScmRun(
        np.arange(6).reshape(3, 2),
        index=[2010, 2020, 2030],
        columns={
            "variable": "Surface Temperature",
            "unit": "K",
            "model": ["model_a", "model_b"],
            "scenario": "scen_a",
            "region": "World",
        },
    )

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        start.to_nc(out_fname, dimensions=("region", "model", "scenario"))
        loaded = ScmRun.from_nc(out_fname)

    assert_scmdf_almost_equal(start, loaded, check_ts_names=False)
Beispiel #21
0
def test_delta_per_delta_time_multiple_ts():
    variables = ["Emissions|CO2", "Heat Uptake", "Temperature"]
    start = get_multiple_ts(
        data=np.array([[1, 2, 3], [-1, -2, -3], [0, 5, 10]]).T,
        index=[2020, 2025, 2040],
        variable=variables,
        unit=["Mt CO2", "J / m^2", "K"],
    )

    res = start.delta_per_delta_time()

    assert sorted(res["unit"]) == sorted([
        "CO2 * megametric_ton / second",
        "joule / meter ** 2 / second",
        "kelvin / second",
    ])

    res = (res.convert_unit("Mt CO2 / yr",
                            variable="Delta Emissions|CO2").convert_unit(
                                "J / m^2 / yr",
                                variable="Delta Heat Uptake").convert_unit(
                                    "K / yr", variable="Delta Temperature"))

    exp = get_single_ts(
        data=np.array([[1 / 5, 1 / 15], [-1 / 5, -1 / 15], [5 / 5, 5 / 15]]).T,
        index=[2022.5, (2025 + 2040) / 2],
        variable=["Delta {}".format(v) for v in variables],
        unit=["Mt CO2 / yr", "J / m^2 / yr", "K / yr"],
    )

    for v in variables:
        cv = "Delta {}".format(v)
        exp_comp = exp.filter(variable=cv)
        res_comp = res.filter(variable=cv).convert_unit(
            exp_comp.get_unique_meta("unit", no_duplicates=True), )

        assert_scmdf_almost_equal(res_comp,
                                  exp_comp,
                                  allow_unordered=True,
                                  check_ts_names=False,
                                  rtol=1e-3)
Beispiel #22
0
def test_multiple_ops_cols():
    base = get_single_ts(variable="Surface Temperature", unit="K")
    other = get_single_ts(variable="Cumulative Emissions|CO2", unit="GtC")

    res = base.add(
        other,
        op_cols={
            "variable": "Warming plus Cumulative emissions CO2",
            "unit": "nonsense",
        },
    )

    exp_ts = perform_op(base, other, "add", ["variable", "unit"])
    exp_ts["variable"] = "Warming plus Cumulative emissions CO2"
    exp_ts["unit"] = "nonsense"

    exp = ScmRun(exp_ts)

    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False)
Beispiel #23
0
def test_integration(out_var):
    dat = [1, 2, 3]
    start = get_single_ts(data=dat, index=[1, 2, 3], unit="GtC / yr")

    res = start.integrate(out_var=out_var)

    if out_var is None:
        exp_var = ("Cumulative " + start["variable"]).values
    else:
        exp_var = out_var

    exp = get_single_ts(data=np.array([0, 1.5, 4]),
                        index=[1, 2, 3],
                        variable=exp_var,
                        unit="gigatC")
    # rtol is because our calculation uses seconds, which doesn't work out
    # quite the same as assuming a regular year
    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False,
                              rtol=1e-3)
Beispiel #24
0
def test_delta_per_delta_time(out_var):
    dat = [1, 2, 3]
    start = get_single_ts(data=dat, index=[1, 2, 3], unit="GtC / yr")

    res = start.delta_per_delta_time(
        out_var=out_var).convert_unit("GtC / yr^2")

    if out_var is None:
        exp_var = ("Delta " + start["variable"]).values
    else:
        exp_var = out_var

    exp = get_single_ts(data=np.array([1, 1]),
                        index=[1.5, 2.5],
                        variable=exp_var,
                        unit="GtC / yr^2")
    # rtol is because our calculation uses seconds, which doesn't work out
    # quite the same as assuming a regular year
    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False,
                              rtol=1e-3)
Beispiel #25
0
def test_vector_ops_pint(op):
    vector = np.arange(3) * unit_registry("MtC / yr")
    start = get_multiple_ts(variable="Emissions|CO2",
                            unit="GtC / yr",
                            scenario=["scen_a", "scen_b"])

    exp_ts = perform_pint_op(start, vector, op)
    exp = ScmRun(exp_ts)

    if op in ["add", "subtract"]:
        exp["unit"] = "gigatC / a"

    elif op == "multiply":
        exp["unit"] = "gigatC * megatC / a ** 2"

    elif op == "divide":
        exp["unit"] = "gigatC / megatC"

    if op == "add":
        res = start + vector

    elif op == "subtract":
        res = start - vector

    elif op == "divide":
        res = start / vector

    elif op == "multiply":
        res = start * vector

    else:
        raise NotImplementedError(op)

    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False)
Beispiel #26
0
def test_linear_regression_scmrun():
    start = get_multiple_ts(
        data=np.array([[1, 2, 3], [-1, -2, -3], [0, 8, 10], [0, 5, 10]]).T,
        index=[1969, 1970, 1971],
        variable="Emissions|CO2",
        unit=["Mt CO2 / yr", "Mt CO2 / yr", "Mt CO2 / yr", "GtC / yr"],
        scenario=["a", "b", "c", "d"],
    )

    res = start.linear_regression_scmrun()

    exp = get_multiple_ts(
        data=np.array([[1, 2, 3], [-1, -2, -3], [1, 6, 11], [0, 5, 10]]).T,
        index=[1969, 1970, 1971],
        variable="Emissions|CO2",
        unit=["Mt CO2 / yr", "Mt CO2 / yr", "Mt CO2 / yr", "GtC / yr"],
        scenario=["a", "b", "c", "d"],
    )

    assert_scmdf_almost_equal(res,
                              exp,
                              allow_unordered=True,
                              check_ts_names=False,
                              rtol=1e-3)
Beispiel #27
0
def test_database_load_data(tdb_with_data, start_scmrun, filter):
    loaded_ts = tdb_with_data.load(**filter)
    assert_scmdf_almost_equal(loaded_ts,
                              start_scmrun.filter(**filter),
                              check_ts_names=False)