Beispiel #1
0
def test_no_netcdf(scm_run):
    with pytest.raises(
            ImportError,
            match="netcdf4 is not installed. Run 'pip install netcdf4'"):
        run_to_nc(scm_run.__class__, "ignored")

    with pytest.raises(
            ImportError,
            match="netcdf4 is not installed. Run 'pip install netcdf4'"):
        nc_to_run(scm_run, "ignored")
Beispiel #2
0
def test_nc_read_failure(scm_run, test_data_path, caplog):
    test_fname = join(
        test_data_path,
        "netcdf-scm_tas_Amon_bcc-csm1-1_rcp26_r1i1p1_209001-211012.nc")

    with pytest.raises(Exception):
        nc_to_run(scm_run.__class__, test_fname)

    assert caplog.record_tuples[0][0] == "scmdata.netcdf"
    assert caplog.record_tuples[0][1] == logging.ERROR
    assert caplog.record_tuples[0][
        2] == "Failed reading netcdf file: {}".format(test_fname)
Beispiel #3
0
def test_nc_with_metadata(scm_run, mdata):
    def _cmp(a, b):
        if isinstance(a, (list, np.ndarray)):
            return (a == b).all()
        else:
            return a == b

    scm_run.metadata = mdata.copy()
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        run_to_nc(scm_run, out_fname, dimensions=("scenario", ))

        ds = nc.Dataset(out_fname)

        nc_attrs = {a: ds.getncattr(a) for a in ds.ncattrs()}
        for k, v in mdata.items():
            assert k in nc_attrs
            assert _cmp(nc_attrs[k], v)
        assert "created_at" in nc_attrs

        run_read = nc_to_run(scm_run.__class__, out_fname)

        for k, v in mdata.items():
            assert k in run_read.metadata
            assert _cmp(run_read.metadata[k], v)

        assert "created_at" in run_read.metadata
Beispiel #4
0
def test_nc_to_run_4d(scm_run):
    df = scm_run.timeseries()
    val_cols = df.columns.tolist()
    df = df.reset_index()

    df["climate_model"] = "base_m"
    df["run_id"] = 1
    df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols))

    big_df = [df]
    for climate_model in ["abc_m", "def_m", "ghi_m"]:
        for run_id in range(10):
            new_df = df.copy()
            new_df["run_id"] = run_id
            new_df["climate_model"] = climate_model
            new_df.loc[:, val_cols] = np.random.rand(df.shape[0],
                                                     len(val_cols))

            big_df.append(new_df)

    scm_run = scm_run.__class__(pd.concat(big_df).reset_index(drop=True))

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        run_to_nc(scm_run,
                  out_fname,
                  dimensions=("scenario", "climate_model", "run_id"))

        assert exists(out_fname)

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #5
0
def test_run_to_nc_case(scm_run, v):
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        scm_run = scm_run.filter(variable="Primary Energy")
        scm_run["variable"] = v

        run_to_nc(scm_run, out_fname, dimensions=("scenario", ))
        res = nc_to_run(scm_run.__class__, out_fname)

        assert res.get_unique_meta("variable", True) == v
Beispiel #6
0
def test_nc_to_run(scm_run, dimensions):
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        run_to_nc(scm_run, out_fname, dimensions=dimensions)

        assert exists(out_fname)

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #7
0
def test_nc_to_run_without_dimensions(scm_run):
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        scm_run = scm_run.filter(scenario="a_scenario2")
        scm_run["run_id"] = [2]
        run_to_nc(scm_run, out_fname, dimensions=(), extras=("run_id", ))

        assert exists(out_fname)

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #8
0
def test_nc_to_run_with_extras_id_needed_and_not_needed(scm_run):
    scmrun = scm_run.filter(scenario="a_scenario")

    full_df = []
    for model in ("model_a", "model_b"):
        for scenario in ("scenario_a", "scenario_b"):
            for run_id in range(10):
                tmp = scmrun.timeseries()
                tmp["run_id"] = run_id
                tmp["model"] = model
                tmp["scenario"] = scenario
                tmp.index = tmp.index.droplevel(["model", "scenario"])
                full_df.append(tmp)

    full_df = pd.concat(full_df)
    scm_run = scm_run.__class__(full_df)
    scm_run["paraset_id"] = scm_run["run_id"].apply(lambda x: x // 3)

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        run_to_nc(
            scm_run,
            out_fname,
            dimensions=("climate_model", "run_id", "scenario"),
            extras=("paraset_id", "model"),
        )

        assert exists(out_fname)

        xr_ds = xr.load_dataset(out_fname)

        # Should save with dimensions: "time", "climate_model",
        # "run_id", "scenario" and "_id"
        assert len(xr_ds["Primary_Energy"].shape) == 5

        # model must be saved with id
        assert xr_ds["model"].dims == ("_id", )
        # paraset_id is wholly defined by run_id
        assert xr_ds["paraset_id"].dims == ("run_id", )

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #9
0
def test_nc_to_run_with_extras_id_not_needed_sparsity(scm_run):
    df = scm_run.filter(scenario="a_scenario").timeseries()
    val_cols = df.columns.tolist()
    df = df.reset_index()

    df["climate_model"] = "base_m"
    df["run_id"] = 1
    df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols))

    big_df = [df]
    for climate_model in ["abc_m", "def_m", "ghi_m"]:
        for run_id in range(10):
            new_df = df.copy()
            new_df["run_id"] = run_id
            new_df["climate_model"] = climate_model
            new_df.loc[:, val_cols] = np.random.rand(df.shape[0],
                                                     len(val_cols))

            big_df.append(new_df)

    big_df = pd.concat(big_df).reset_index(drop=True)
    big_df["paraset_id"] = big_df["run_id"].apply(lambda x: x // 3)
    scm_run = scm_run.__class__(big_df)

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        run_to_nc(
            scm_run,
            out_fname,
            dimensions=("climate_model", "run_id"),
            extras=("paraset_id", ),
        )

        assert exists(out_fname)

        xr_ds = xr.load_dataset(out_fname)
        # Should save with three dimensions: "time", "climate_model", "run_id"
        # There should be no "_id" as paraset_id is uniquely defined by "run_id"
        assert len(xr_ds["Primary_Energy"].shape) == 3

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #10
0
def test_nc_to_run_with_extras_sparsity(scm_run):
    df = scm_run.timeseries()
    val_cols = df.columns.tolist()
    df = df.reset_index()

    df["climate_model"] = "base_m"
    df["run_id"] = 1
    df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols))

    big_df = [df]
    for climate_model in ["abc_m", "def_m", "ghi_m"]:
        for run_id in range(10):
            new_df = df.copy()
            new_df["run_id"] = run_id
            new_df["climate_model"] = climate_model
            new_df.loc[:, val_cols] = np.random.rand(df.shape[0],
                                                     len(val_cols))

            big_df.append(new_df)

    scm_run = scm_run.__class__(pd.concat(big_df).reset_index(drop=True))

    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")

        run_to_nc(
            scm_run,
            out_fname,
            dimensions=("climate_model", "run_id"),
            extras=("scenario", ),
        )

        assert exists(out_fname)

        xr_ds = xr.load_dataset(out_fname)
        # Should save with four dimensions: "time", "climate_model", "run_id", "_id"
        # the "_id" dimension is required as a short-hand mapping between extras and
        # the data.
        # There is no way to avoid this sparsity.
        assert len(xr_ds["Primary_Energy"].shape) == 4

        run_read = nc_to_run(scm_run.__class__, out_fname)
        assert isinstance(run_read, scm_run.__class__)

        assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
Beispiel #11
0
def test_run_to_nc_weird_name(scm_run, ch, start_with_weird):
    with tempfile.TemporaryDirectory() as tempdir:
        out_fname = join(tempdir, "out.nc")
        scm_run = scm_run.filter(variable="Primary Energy")
        variable = scm_run.get_unique_meta("variable", True)

        if start_with_weird:
            variable = ch + " " + variable
        else:
            variable = variable + " " + ch

        scm_run["variable"] = variable

        if start_with_weird:
            error_msg = re.escape("NetCDF: Name contains illegal characters")
            with pytest.raises(RuntimeError, match=error_msg):
                run_to_nc(scm_run, out_fname, dimensions=("scenario", ))

        else:
            run_to_nc(scm_run, out_fname, dimensions=("scenario", ))
            res = nc_to_run(scm_run.__class__, out_fname)

            assert res.get_unique_meta("variable", True) == variable