Beispiel #1
0
def test_volumetric_rates():
    """Test the summary resampling code for virtual ensembles

    We only need to test the aggregation here.
    """

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    )
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_scalar("npv.txt")
    vens = reekensemble.to_virtual()

    vol_rates = vens.get_volumetric_rates(column_keys="FOPT", time_index="yearly")
    assert isinstance(vol_rates, pd.DataFrame)
    assert "REAL" in vol_rates
    assert "DATE" in vol_rates
    assert "FOPR" in vol_rates
    assert len(vol_rates) == 25
Beispiel #2
0
def test_get_df_merge():
    """Testing merge support in get_df()"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    )
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_scalar("npv.txt")
    reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv")
    outputs = reekensemble.load_txt("outputs.txt")
    vens = reekensemble.to_virtual()

    params = vens.get_df("parameters.txt")
    smrycount = len(vens.get_df("unsmry--yearly").columns)
    smryparams = vens.get_df("unsmry--yearly", merge="parameters")

    # The "minus 1" is due to the REAL column being present in both tables.
    assert len(smryparams.columns) == len(params.columns) + smrycount - 1

    paramsoutputs = vens.get_df("parameters", merge=["outputs"])
    assert len(paramsoutputs.columns) == len(params.columns) + len(outputs.columns) - 1

    assert (
        len(vens.get_df("unsmry--yearly", merge=["parameters", "outputs"]).columns)
        == smrycount + len(params.columns) + len(outputs.columns) - 2
    )

    assert (
        len(vens.get_df("parameters", merge="npv.txt").columns)
        == len(params.columns) + 1
    )
    # Symmetry:
    assert (
        len(vens.get_df("npv.txt", merge="parameters.txt").columns)
        == len(params.columns) + 1
    )

    # Merge with zone data, inject a mocked dataframe to the realization:
    vens.data["fipnum2zone"] = pd.DataFrame(
        columns=["FIPNUM", "ZONE"],
        data=[
            [1, "UpperReek"],
            [2, "MidReek"],
            [3, "LowerReek"],
            [4, "UpperReek"],
            [5, "MidReek"],
            [6, "LowerReek"],
        ],
    )
    volframe = vens.get_df("simulator_volume_fipnum", merge="fipnum2zone")
    assert "ZONE" in volframe
    assert "FIPNUM" in volframe
    assert "STOIIP_OIL" in volframe
    assert len(volframe["ZONE"].unique()) == 3
def test_manual_aggregation():
    """Test that aggregating an ensemble using
    RealizationCombination is the same as calling agg() on the
    ensemble"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv")

    # Aggregate an ensemble into a virtual "mean" realization
    mean = reekensemble.agg("mean")

    # Combine the ensemble members directly into a mean computation.
    # Also returns a virtual realization.
    manualmean = (1 / 5 *
                  (reekensemble[0] + reekensemble[1] + reekensemble[2] +
                   reekensemble[3] + reekensemble[4]))

    # Commutativity proof:
    assert mean["parameters"]["RMS_SEED"] == manualmean["parameters"][
        "RMS_SEED"]
def test_get_smry_meta(tmpdir):
    """Test the conservation of smry meta-data in virtual ensembles"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reekmetatest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    # If no smry loaded before virtualization, nothing should be there:
    assert "__smry_metadata" not in reekensemble.to_virtual().keys()

    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    origmeta = reekensemble.get_smry_meta()
    vens = reekensemble.to_virtual()
    assert "__smry_metadata" in vens.keys()
    meta = vens.get_df("__smry_metadata")
    # Internally it is stored as a DataFrame, we check that
    # since it is possible to get it using get_df(), and thereby
    # almost part of the API
    assert isinstance(meta, pd.DataFrame)

    # But rather users should use get_smry_meta() to obtain
    # stuff from the internal frame __smry_metadata:
    metadict = vens.get_smry_meta()
    assert isinstance(metadict, dict)
    assert len(metadict) + 2 == len(
        vens.get_smry(time_index="yearly", column_keys="*").columns)
    # (the vens only knows of F* columns)
    assert len(metadict) + 2 == len(
        vens.get_smry(time_index="yearly", column_keys="F*").columns)

    assert origmeta["FOPT"] == metadict["FOPT"]
    assert origmeta["FWPTH"] == metadict["FWPTH"]

    assert not vens.get_smry_meta([])
    assert vens.get_smry_meta(column_keys="FOPT")["FOPT"] == origmeta["FOPT"]

    assert not vens.get_smry_meta(column_keys="WOPT:NOTEXISTING")

    # Test that it is retrievable after dumping to disk:
    vens_disk_path = str(tmpdir.join("vens_dumped"))
    vens.to_disk(vens_disk_path)
    disk_vens = VirtualEnsemble(fromdisk=vens_disk_path)
    metadict = disk_vens.get_smry_meta()
    assert isinstance(metadict, dict)
    assert len(metadict) + 2 == len(
        vens.get_smry(time_index="yearly", column_keys="*").columns)
    # (the vens only knows of F* columns)
    assert len(metadict) + 2 == len(
        vens.get_smry(time_index="yearly", column_keys="F*").columns)

    assert origmeta["FOPT"] == metadict["FOPT"]
    assert origmeta["FWPTH"] == metadict["FWPTH"]
Beispiel #5
0
def test_todisk_includefile(tmpdir):
    """Test that we can write VirtualEnsembles to the filesystem in a
    retrievable manner with discovered files included"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    reekensemble = ScratchEnsemble(
        "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    )

    tmpdir.chdir()

    reekensemble.load_smry(time_index="monthly", column_keys="*")
    reekensemble.load_smry(time_index="daily", column_keys="*")
    reekensemble.load_smry(time_index="yearly", column_keys="F*")
    reekensemble.load_smry(column_keys="FOPT")

    reekensemble.load_scalar("npv.txt")
    reekensemble.load_txt("outputs.txt")
    vens = reekensemble.to_virtual()

    vens.to_disk("vens_dumped_files", delete=True, includefiles=True, symlinks=True)
    for real in [0, 1, 2, 4, 4]:
        runpath = os.path.join(
            "vens_dumped_files", "__discoveredfiles", "realization-" + str(real)
        )
        assert os.path.exists(runpath)
        assert os.path.exists(
            os.path.join(runpath, "eclipse/model/2_R001_REEK-" + str(real) + ".UNSMRY")
        )
Beispiel #6
0
def test_eclsumcaching():
    """Test caching of eclsum"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    dirs = testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    ens = ScratchEnsemble("reektest", dirs)

    # The problem here is if you load in a lot of UNSMRY files
    # and the Python process keeps them in memory. Not sure
    # how to check in code that an object has been garbage collected
    # but for garbage collection to work, at least the realization
    # _eclsum variable must be None.

    ens.load_smry()
    # Default is to do caching, so these will not be None:
    assert all([x._eclsum for (idx, x) in ens.realizations.items()])

    # If we redo this operation, the same objects should all
    # be None afterwards:
    ens.load_smry(cache_eclsum=False)
    # cache_eclsum==None is from v1.1.5 no longer equivalent to False
    assert not any([x._eclsum for (idx, x) in ens.realizations.items()])

    ens.get_smry()
    assert all([x._eclsum for (idx, x) in ens.realizations.items()])

    ens.get_smry(cache_eclsum=False)
    assert not any([x._eclsum for (idx, x) in ens.realizations.items()])

    ens.get_smry_stats()
    assert all([x._eclsum for (idx, x) in ens.realizations.items()])

    ens.get_smry_stats(cache_eclsum=False)
    assert not any([x._eclsum for (idx, x) in ens.realizations.items()])

    ens.get_smry_dates()
    assert all([x._eclsum for (idx, x) in ens.realizations.items()])

    # Clear the cached objects because the statement above has cached it..
    for _, realization in ens.realizations.items():
        realization._eclsum = None

    ens.get_smry_dates(cache_eclsum=False)
    assert not any([x._eclsum for (idx, x) in ens.realizations.items()])
Beispiel #7
0
def _load_smry_dataframe_using_fmu(
    ens_path: str, frequency: Optional[Frequency]
) -> pd.DataFrame:

    time_index: str = "raw"
    if frequency:
        time_index = frequency.value

    print(f"## Loading data into DataFrame using FMU  time_index={time_index}...")

    scratch_ensemble = ScratchEnsemble("tempEnsName", paths=ens_path)
    df = scratch_ensemble.load_smry(time_index=time_index)

    df = _make_date_column_datetime_object(df)

    # Convert float columns to float32 and real column to int32
    floatcols = df.select_dtypes("float").columns
    df[floatcols] = df[floatcols].apply(pd.to_numeric, downcast="float")
    df["REAL"] = df["REAL"].astype("int32")

    # Sort on real, then date to align with provider
    df.sort_values(by=["REAL", "DATE"], inplace=True)
    df.reset_index(drop=True, inplace=True)

    return df
Beispiel #8
0
def test_virtual_observations():
    """Construct an virtual(?) observation object from a specific summary vector
    and use it to rank realizations for similarity.
    """

    # We need an ensemble to work with:
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")
    ens.load_smry(column_keys=["FOPT", "FGPT", "FWPT", "FWCT", "FGOR"],
                  time_index="yearly")

    # And we need some VirtualRealizations
    virtreals = {
        "p90realization": ens.agg("p90"),
        "meanrealization": ens.agg("mean"),
        "p10realization": ens.agg("p10"),
    }

    summaryvector = "FOPT"
    representative_realizations = {}
    for virtrealname, virtreal in six.iteritems(virtreals):
        # Create empty observation object
        obs = Observations({})
        obs.load_smry(virtreal, summaryvector, time_index="yearly")

        # Calculate how far each realization is from this observation set
        # (only one row pr. realization, as FOPTH is only one observation unit)
        mis = obs.mismatch(ens)

        closest_realization = (
            mis.groupby("REAL").sum()["L2"].sort_values().index.values[0])
        representative_realizations[virtrealname] = closest_realization

    assert representative_realizations["meanrealization"] == 4
    assert representative_realizations["p90realization"] == 2
    assert representative_realizations["p10realization"] == 1
def test_get_smry_interpolation():
    """Test the summary resampling code for virtual ensembles"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_scalar("npv.txt")
    vens_yearly = reekensemble.to_virtual()
    reekensemble.load_smry(time_index="monthly", column_keys=["F*"])
    # Create a vens that contains both monthly and yearly:
    vens_monthly = reekensemble.to_virtual()
    assert "npv.txt" in vens_monthly.keys()
    reekensemble.load_smry(time_index="daily", column_keys=["F*"])
    _ = reekensemble.to_virtual()  # monthly, yearly *and* daily

    # Resample yearly to monthly:
    monthly = vens_yearly.get_smry(column_keys="FOPT", time_index="monthly")
    assert "FOPT" in monthly.columns
    assert "REAL" in monthly.columns
    assert "DATE" in monthly.columns
    assert len(monthly["REAL"].unique()) == 5

    # 12 months pr. year, including final 1. jan, four years, 5 realizations:
    assert len(monthly) == (12 * 4 + 1) * 5

    for realidx in monthly["REAL"].unique():
        int_m = monthly.set_index("REAL").loc[realidx].set_index("DATE")
        true_m = (reekensemble.get_smry(
            column_keys="FOPT", time_index="monthly").set_index(
                "REAL").loc[realidx].set_index("DATE"))
        difference = int_m["FOPT"] - true_m["FOPT"]

        # The interpolation error should be zero at each 1st of January
        # but most likely nonzero elsewhere (at least for these realization)
        assert difference.loc["2001-01-01"] < 0.0001
        assert abs(difference.loc["2001-06-01"]) > 0
        assert difference.loc["2002-01-01"] < 0.0001
        assert abs(difference.loc["2002-06-01"]) > 0
        assert difference.loc["2003-01-01"] < 0.0001

    daily = vens_yearly.get_smry(column_keys=["FOPT", "FOPR"],
                                 time_index="daily")
    assert "FOPT" in daily.columns
    assert "REAL" in daily.columns
    assert "DATE" in daily.columns
    assert len(daily["REAL"].unique()) == 5
    assert len(daily) == (365 * 4 + 2) * 5  # 2003-01-01 and 2003-01-02 at end

    # Linear interpolation will give almost unique values everywhere:
    assert len(daily["FOPT"].unique()) > (365 * 4) * 5
    # While bfill for rates cannot be more unique than the yearly input
    assert len(daily["FOPR"].unique()) < 4 * 5  # Must be less than the numbers
Beispiel #10
0
def test_noparameters():
    """Test what happens when parameters.txt is missing"""

    testdir = os.path.dirname(os.path.abspath(__file__))
    reekensemble = ScratchEnsemble(
        "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    )
    # Parameters.txt exist on disk, so it is loaded:
    assert not reekensemble.parameters.empty
    # Remove it each realization:
    reekensemble.remove_data("parameters.txt")
    assert reekensemble.parameters.empty

    # However, when parameters.txt is excplicitly asked for,
    # an exception should be raised:
    with pytest.raises(KeyError):
        reekensemble.get_df("parameters.txt")

    reekensemble.load_smry(time_index="yearly", column_keys="FOPT")
    assert not reekensemble.get_df("unsmry--yearly").empty
    with pytest.raises(KeyError):
        reekensemble.get_df("unsmry--yearly", merge="parameters.txt")
Beispiel #11
0
def test_filedescriptors():
    """Test how filedescriptors are used.

    The lazy_load option to EclSum affects this, if it is set to True
    file descriptors are not closed (and True is the default).
    In order to be able to open thousands of smry files, we need
    to always close the file descriptors when possible, and therefore
    lazy_load should be set to False in realization.py"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    fd_dir = "/proc/" + str(os.getpid()) + "/fd"
    if not os.path.exists(fd_dir):
        print("Counting file descriptors on non-Linux not supported")
        return
    fd_count1 = len(os.listdir(fd_dir))
    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")

    # fd_count2 = len(os.listdir(fd_dir))
    reekensemble.load_smry()
    # fd_count3 = len(os.listdir(fd_dir))
    del reekensemble
    fd_count4 = len(os.listdir(fd_dir))

    # As long as lazy_load = False, we should have 5,5,5,5 from this
    # If lazy_load is True (default), then we get 15, 15, 25, 20
    # (that last number pattern reveals a (now fixed) bug in EclSum)
    # print(fd_count1, fd_count2, fd_count3, fd_count4)

    assert fd_count1 == fd_count4
Beispiel #12
0
def _dump_smry_to_csv_using_fmu(ens_path: str, time_index: str,
                                output_csv_file: str) -> None:
    scratch_ensemble = ScratchEnsemble("tempEnsName", paths=ens_path)
    df = scratch_ensemble.load_smry(time_index=time_index)
    df.sort_values(["DATE", "REAL"], inplace=True)

    print("Dataframe shape::", df.shape)

    unique_dates = df["DATE"].unique()
    print("Num unique dates:", len(unique_dates))
    print(unique_dates)

    unique_reals = df["REAL"].unique()
    print("Num unique reals:", len(unique_reals))
    print(unique_reals)

    df.to_csv(output_csv_file, index=False)
Beispiel #13
0
def test_ens_premature_ecl(tmpdir):
    """Check an ensemble where Eclipse has failed early in realization 1"""
    if "__file__" in globals():
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    origensemble = ScratchEnsemble(
        "origreek", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    )
    raw_orig_smry = origensemble.load_smry()
    # Copy the ensemble to /tmp so we can modify the UNSMRY file in real 2:
    tmpdir.chdir()

    shutil.copytree(testdir + "/data/testensemble-reek001", "ens_fail_real_reek001")
    unsmry_filename = (
        "ens_fail_real_reek001/realization-1/"
        + "iter-0/eclipse/model/2_R001_REEK-1.UNSMRY"
    )
    shutil.copy(unsmry_filename + "-failed2000", unsmry_filename)

    failensemble = ScratchEnsemble(
        "failedreek", "ens_fail_real_reek001/realization-*/iter-0"
    )
    raw_fail_smry = failensemble.load_smry()

    # This is usually superfluous when raw datetimes are obtained.
    raw_orig_smry["DATE"] = pd.to_datetime(raw_orig_smry["DATE"])
    raw_fail_smry["DATE"] = pd.to_datetime(raw_fail_smry["DATE"])

    # Homogeneous max-date in orig smry:
    assert len(raw_orig_smry.groupby("REAL").max()["DATE"].unique()) == 1
    # Different values for raw_fail:
    assert len(raw_fail_smry.groupby("REAL").max()["DATE"].unique()) == 2
    # END statement in schedule file on 2000-08-01 yields this:
    assert (
        str(raw_fail_smry.groupby("REAL").max()["DATE"].loc[1]) == "2000-08-01 00:00:00"
    )

    # Filter away all those that did not make it to the end. In normal scenarios,
    # this would be accomplished by .filter('OK'), but not in this test context.
    max_date = str(failensemble.get_smry()["DATE"].max())
    filtered_fail_ensemble = failensemble.filter(
        "unsmry--raw", column="DATE", columncontains=max_date, inplace=False
    )
    assert len(filtered_fail_ensemble) == 4
    assert (
        len(filtered_fail_ensemble.get_smry().groupby("REAL").max()["DATE"].unique())
        == 1
    )
    # Check also get_smry():
    assert len(failensemble.get_smry().groupby("REAL").max()["DATE"].unique()) == 2

    # With time_index set to something, then all realization will get
    # interpolated onto the same date range
    assert (
        len(
            failensemble.get_smry(time_index="monthly")
            .groupby("REAL")
            .max()["DATE"]
            .unique()
        )
        == 1
    )
    # This is in fact *different* from what you would get from load_smry (issue #97)
    assert (
        len(
            failensemble.load_smry(time_index="monthly")
            .groupby("REAL")
            .max()["DATE"]
            .unique()
        )
        == 2
    )
    # (this behaviour might change, get_smry() is allowed in
    # the future to mimic load_smry())

    # Check that FOPT is very much lower in real 1 in failed ensemble:
    assert (
        failensemble.get_smry(column_keys="FOPT", time_index="monthly")
        .groupby("REAL")
        .max()["FOPT"]
        .loc[1]
        < 1500000
    )
    assert (
        origensemble.get_smry(column_keys="FOPT", time_index="monthly")
        .groupby("REAL")
        .max()["FOPT"]
        .loc[1]
        > 6000000
    )

    # Also for yearly
    assert (
        failensemble.get_smry(column_keys="FOPT", time_index="yearly")
        .groupby("REAL")
        .max()["FOPT"]
        .loc[1]
        < 1500000
    )
    assert (
        origensemble.get_smry(column_keys="FOPT", time_index="yearly")
        .groupby("REAL")
        .max()["FOPT"]
        .loc[1]
        > 6000000
    )

    fail_foprs = failensemble.get_smry(column_keys="FOPR", time_index="monthly")

    # The FOPR rate vector should be all zero after the stop
    assert (
        fail_foprs[
            (fail_foprs["REAL"] == 1) & (fail_foprs["DATE"] > datetime.date(2000, 8, 1))
        ]["FOPR"]
        .abs()
        .sum()
        == 0
    )
    assert (
        fail_foprs[
            (fail_foprs["REAL"] == 0) & (fail_foprs["DATE"] > datetime.date(2000, 8, 1))
        ]["FOPR"]
        .abs()
        .sum()
        > 0
    )

    # This frame treats the "failed" realization as correct,
    # and it will affect the stats:
    fail_stats = failensemble.get_smry_stats(time_index="monthly")
    # Here, real 1 is removed
    filtered_stats = filtered_fail_ensemble.get_smry_stats(time_index="monthly")
    # Original stats
    orig_stats = origensemble.get_smry_stats(time_index="monthly")

    # The 30 last rows are the rows from 2000-09-01 to 2003-02-01:
    assert fail_stats.loc["minimum"]["FOPR"].iloc[-30:].abs().sum() == 0
    assert fail_stats.loc["minimum"]["FOPT"].iloc[-30:].unique()[0] == 1431247.125
    # Oh no, in filtered stats, the last date 2003-02-01 is
    # not included, probably a minor bug!
    # But that means that the indexing of the last 30 is a little bit rogue.
    # (this test should work even that bug is fixed)
    assert filtered_stats.loc["minimum"]["FOPR"].iloc[-29:].abs().sum() > 0
    assert len(filtered_stats.loc["minimum"]["FOPT"].iloc[-29:].unique()) == 29

    # Mean FOPR and FOPT should be affected by the zero-padded rates:
    assert (
        fail_stats.loc["mean"].iloc[-10]["FOPR"]
        < filtered_stats.loc["mean"].iloc[-10]["FOPR"]
    )
    assert (
        fail_stats.loc["mean"].iloc[-10]["FOPR"]
        < orig_stats.loc["mean"].iloc[-10]["FOPR"]
    )
    assert (
        fail_stats.loc["mean"].iloc[-10]["FOPT"]
        < filtered_stats.loc["mean"].iloc[-10]["FOPT"]
    )
    assert (
        fail_stats.loc["mean"].iloc[-10]["FOPT"]
        < orig_stats.loc["mean"].iloc[-10]["FOPT"]
    )

    # Delta profiles:
    delta_fail = origensemble - failensemble
    # Delta profiles are given for all realizations
    delta_fail_smry = delta_fail.get_smry()
    assert len(delta_fail_smry["REAL"].unique()) == 5
    # and they all end at the same ultimate date:
    assert len(delta_fail_smry.groupby("REAL").max()["DATE"].unique()) == 1
    # BUT, there is only NaNs for values after 2000-08-01:
    assert np.isnan(
        delta_fail_smry[
            (delta_fail_smry["REAL"] == 1) & (delta_fail_smry["DATE"] > "2000-08-01")
        ]["FOPT"].unique()[0]
    )

    # Delta profiles after filtering:
    delta_filtered = origensemble - filtered_fail_ensemble
    assert len(origensemble) == 5
    assert len(filtered_fail_ensemble) == 4
    # assert len(delta_filtered) == 4  # Only four realizations (requires #83 resolved)
    # to_virtual() and time_index can be removed when #83 is finished.
    delta_filtered_smry = delta_filtered.to_virtual().get_smry(time_index="monthly")
    # Should contain only four realizations, as one has been filtered away
    assert len(delta_filtered_smry["REAL"].unique()) == 4
    # Ultimate date is the same in all four:
    assert len(delta_filtered_smry.groupby("REAL").max()["DATE"].unique()) == 1
def test_ensemble_aggregations(tmpdir):
    """Test aggregations of ensembles, that
    is taking means, medians, p10 and so on, producing
    virtual realizations"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    reekensemble.load_smry(time_index="monthly", column_keys=["F*"])
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv")
    reekensemble.load_scalar("npv.txt", convert_numeric=True)

    stats = {
        "mean": reekensemble.agg("mean"),
        "median": reekensemble.agg("median"),
        "min": reekensemble.agg("min"),
        "max": reekensemble.agg("max"),
        "p10": reekensemble.agg("p10"),  # low estimate
        "p90": reekensemble.agg("p90"),  # high estimate
    }

    tmpdir.chdir()
    stats["min"].to_disk("virtreal_min", delete=True)
    stats["max"].to_disk("virtreal_max", delete=True)
    stats["mean"].to_disk("virtreal_mean", delete=True)

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["p10"]["parameters.txt"]["RMS_SEED"])
    assert (stats["p10"]["parameters.txt"]["RMS_SEED"] <=
            stats["median"]["parameters.txt"]["RMS_SEED"])
    assert (stats["median"]["parameters.txt"]["RMS_SEED"] <=
            stats["p90"]["parameters.txt"]["RMS_SEED"])
    assert (stats["p90"]["parameters.txt"]["RMS_SEED"] <=
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["mean"]["parameters.txt"]["RMS_SEED"])
    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["unsmry--monthly"]["FOPT"].iloc[-1] <
            stats["max"]["unsmry--monthly"]["FOPT"].iloc[-1])

    # .loc[2] corresponds to FIPNUM=3
    assert (stats["min"]["simulator_volume_fipnum"].iloc[2]["STOIIP_OIL"] <
            stats["mean"]["simulator_volume_fipnum"].iloc[2]["STOIIP_OIL"])
    assert (stats["mean"]["simulator_volume_fipnum"].loc[2]["STOIIP_OIL"] <
            stats["max"]["simulator_volume_fipnum"].loc[2]["STOIIP_OIL"])

    # Aggregation of STATUS also works. Note that min and max
    # works for string columns, so the available data will vary
    # depending on aggregation method
    assert (stats["p10"]["STATUS"].iloc[49]["DURATION"] <
            stats["max"]["STATUS"].iloc[49]["DURATION"])
    # job 49 is the Eclipse forward model

    assert "npv.txt" in stats["mean"].keys()
    assert stats["mean"]["npv.txt"] == 3382.5

    # Test agg(excludekeys=..)
    assert "STATUS" not in reekensemble.agg("mean",
                                            excludekeys="STATUS").keys()
    assert "STATUS" not in reekensemble.agg("mean",
                                            keylist=["parameters.txt"]).keys()

    assert (reekensemble.agg("p01")["parameters"]["RMS_SEED"] <
            reekensemble.agg("p99")["parameters"]["RMS_SEED"])

    with pytest.raises(ValueError):
        reekensemble.agg("foobar")

    # Check that include/exclude functionality in agg() works:
    assert ("parameters.txt"
            not in reekensemble.agg("mean",
                                    excludekeys="parameters.txt").keys())
    assert ("parameters.txt"
            not in reekensemble.agg("mean",
                                    excludekeys=["parameters.txt"]).keys())
    assert "parameters.txt" not in reekensemble.agg("mean",
                                                    keylist="STATUS").keys()
    assert "parameters.txt" not in reekensemble.agg("mean",
                                                    keylist=["STATUS"]).keys()

    # Shorthand notion works for keys to include, but they
    # should get returned with fully qualified paths.
    assert ("share/results/tables/unsmry--yearly.csv"
            in reekensemble.agg("mean", keylist="unsmry--yearly").keys())
    assert ("share/results/tables/unsmry--yearly.csv"
            in reekensemble.agg("mean", keylist=["unsmry--yearly"]).keys())
    assert isinstance(
        reekensemble.agg("mean",
                         keylist="unsmry--yearly").get_df("unsmry--yearly"),
        pd.DataFrame,
    )
Beispiel #15
0
def test_filter():
    """Test filtering of realizations in ensembles

    Realizations not fulfilling tested conditions are
    dropped from the ensemble"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    dirs = testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    reekensemble = ScratchEnsemble("reektest", dirs)

    # This should just require a STATUS file to be there
    # for every realization
    assert len(reekensemble.filter("STATUS")) == 5

    # Test string equivalence on numeric data:
    reekensemble.filter("parameters.txt",
                        key="RMS_SEED",
                        value="723121249",
                        inplace=True)
    assert len(reekensemble) == 2

    # (False positive from pylint on this line)
    assert reekensemble.agg("mean")["parameters"]["RMS_SEED"] == 723121249

    # Test numeric equivalence
    reekensemble = ScratchEnsemble("reektest", dirs)
    reekensemble.filter("parameters.txt",
                        key="RMS_SEED",
                        value=723121249,
                        inplace=True)
    assert len(reekensemble) == 2
    assert reekensemble.agg("mean")["parameters"]["RMS_SEED"] == 723121249

    reekensemble = ScratchEnsemble("reektest", dirs)
    filtered = reekensemble.filter("parameters.txt", key="FOO", inplace=False)
    assert len(filtered) == 2
    # (NaN in one of the parameters.txt is True in this context)

    filtered = reekensemble.filter("parameters.txt",
                                   key="MULTFLT_F1",
                                   value=0.001,
                                   inplace=False)
    assert len(filtered) == 4
    assert (len(
        reekensemble.filter("parameters.txt",
                            key="FWL",
                            value=1700,
                            inplace=False)) == 3)
    assert (len(
        reekensemble.filter("parameters.txt",
                            key="FWL",
                            value="1700",
                            inplace=False)) == 3)

    # This one is tricky, the empty string should correspond to
    # missing data - NOT IMPLEMENTED
    # assert len(reekensemble.filter('parameters.txt', key='FOO',
    #                               value='', inplace=False) == 4)

    # while no value means that the key must be present
    assert len(reekensemble.filter("parameters.txt", key="FOO",
                                   inplace=False)) == 2

    # 'key' is not accepted for things that are tables.
    with pytest.raises(ValueError):
        reekensemble.filter("STATUS", key="ECLIPSE")
    with pytest.raises(ValueError):
        reekensemble.filter("STATUS", value="ECLIPSE")

    # Check column presence
    assert len(reekensemble.filter("STATUS", column="FORWARD_MODEL")) == 5
    assert (len(
        reekensemble.filter("STATUS", column="FORWARD_MODEL",
                            inplace=False)) == 5)
    assert not reekensemble.filter("STATUS", column="FOOBAR", inplace=False)
    with pytest.raises(ValueError):
        reekensemble.filter("STATUS", wrongarg="FOOBAR", inplace=False)
    assert (len(
        reekensemble.filter("STATUS",
                            column="FORWARD_MODEL",
                            columncontains="ECLIPSE100_2014.2")) == 5)
    assert not reekensemble.filter(
        "STATUS",
        column="FORWARD_MODEL",
        columncontains="ECLIPSE100_2010.2",
        inplace=False,
    )
    reekensemble.load_smry()
    assert len(reekensemble.filter("unsmry--raw")) == 5
    assert len(reekensemble.filter("unsmry--raw", column="FOPT")) == 5
    assert not reekensemble.filter(
        "unsmry--raw", column="FOOBAR", inplace=False)
    assert len(
        reekensemble.filter("unsmry--raw", column="FOPT",
                            columncontains=0)) == 5
    assert not reekensemble.filter(
        "unsmry--raw", column="FOPT", columncontains=-1000, inplace=False)
    assert (len(
        reekensemble.filter("unsmry--raw",
                            column="FOPT",
                            columncontains=6025523.0,
                            inplace=False)) == 1)
    assert (len(
        reekensemble.filter("unsmry--raw",
                            column="FOPT",
                            columncontains=6025523,
                            inplace=False)) == 1)

    # We do not support strings here (not yet)
    # assert len(reekensemble.filter('unsmry--raw', column='FOPT',
    #                                columncontains='6025523.0',
    #                                inplace=False)) == 1

    assert (len(
        reekensemble.filter("unsmry--raw",
                            column="DATE",
                            columncontains="2002-11-25",
                            inplace=False)) == 5)
    assert (len(
        reekensemble.filter(
            "unsmry--raw",
            column="DATE",
            columncontains="2002-11-25 00:00:00",
            inplace=False,
        )) == 5)
    assert not reekensemble.filter(
        "unsmry--raw",
        column="DATE",
        columncontains="2002-11-25 00:00:01",
        inplace=False,
    )
    assert (len(
        reekensemble.filter(
            "unsmry--raw",
            column="DATE",
            columncontains="2000-01-07 02:26:15",
            inplace=False,
        )) == 3)
    assert not reekensemble.filter("unsmry--raw",
                                   column="DATE",
                                   columncontains="2000-01-07",
                                   inplace=False)
Beispiel #16
0
def test_ensemble_ecl():
    """Eclipse specific functionality"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")

    # Eclipse summary keys:
    assert len(reekensemble.get_smrykeys("FOPT")) == 1
    assert len(reekensemble.get_smrykeys("F*")) == 49
    assert len(reekensemble.get_smrykeys(["F*", "W*"])) == 49 + 280
    assert not reekensemble.get_smrykeys("BOGUS")

    # reading ensemble dataframe
    monthly = reekensemble.load_smry(time_index="monthly")

    monthly = reekensemble.load_smry(column_keys=["F*"], time_index="monthly")
    assert monthly.columns[0] == "REAL"  # Enforce order of columns.
    assert monthly.columns[1] == "DATE"
    assert len(monthly) == 190
    # Check that the result was cached in memory, not necessarily on disk..
    assert isinstance(reekensemble.get_df("unsmry--monthly.csv"), pd.DataFrame)

    assert len(reekensemble.keys()) == 4

    # When asking the ensemble for FOPR, we also get REAL as a column
    # in return. Note that the internal stored version will be
    # overwritten by each load_smry()
    assert len(reekensemble.load_smry(column_keys=["FOPR"]).columns) == 3
    assert len(reekensemble.load_smry(column_keys=["FOP*"]).columns) == 11
    assert len(
        reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).columns) == 12

    # Check that there is now a cached version with raw dates:
    assert isinstance(reekensemble.get_df("unsmry--raw.csv"), pd.DataFrame)
    # The columns are not similar, this is allowed!'

    # If you get 3205 here, it means that you are using the union of
    # raw dates from all realizations, which is not correct
    assert len(
        reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).index) == 1700

    # Date list handling:
    assert len(reekensemble.get_smry_dates(freq="report")) == 641
    assert len(reekensemble.get_smry_dates(freq="raw")) == 641
    assert len(reekensemble.get_smry_dates(freq="yearly")) == 5
    assert len(reekensemble.get_smry_dates(freq="monthly")) == 38
    assert len(reekensemble.get_smry_dates(freq="daily")) == 1098
    assert len(reekensemble.get_smry_dates(freq="last")) == 1
    assert reekensemble.get_smry_dates(
        freq="last") == reekensemble.get_smry_dates(freq="last",
                                                    end_date="2050-02-01")

    assert str(reekensemble.get_smry_dates(
        freq="report")[-1]) == "2003-01-02 00:00:00"
    assert str(
        reekensemble.get_smry_dates(freq="raw")[-1]) == "2003-01-02 00:00:00"
    assert str(reekensemble.get_smry_dates(freq="yearly")[-1]) == "2004-01-01"
    assert str(reekensemble.get_smry_dates(freq="monthly")[-1]) == "2003-02-01"
    assert str(reekensemble.get_smry_dates(freq="daily")[-1]) == "2003-01-02"
    assert str(reekensemble.get_smry_dates(freq="last")[-1]) == "2003-01-02"

    assert (str(
        reekensemble.get_smry_dates(
            freq="daily", end_date="2002-03-03")[-1]) == "2002-03-03")
    assert (str(
        reekensemble.get_smry_dates(
            freq="daily", start_date="2002-03-03")[0]) == "2002-03-03")

    # Start and end outside of orig data and on the "wrong side"
    dates = reekensemble.get_smry_dates(end_date="1999-03-03")
    assert len(dates) == 1
    assert str(dates[0]) == "1999-03-03"

    dates = reekensemble.get_smry_dates(start_date="2099-03-03")
    assert len(dates) == 1
    assert str(dates[0]) == "2099-03-03"

    # Time interpolated dataframes with summary data:
    yearly = reekensemble.get_smry_dates(freq="yearly")
    assert len(reekensemble.load_smry(column_keys=["FOPT"],
                                      time_index=yearly)) == 25
    # NB: This is cached in unsmry-custom.csv, not unsmry--yearly!
    # This usage is discouraged. Use 'yearly' in such cases.

    # Check that we can shortcut get_smry_dates:
    assert len(
        reekensemble.load_smry(column_keys=["FOPT"],
                               time_index="yearly")) == 25

    assert len(reekensemble.load_smry(column_keys=["FOPR"],
                                      time_index="last")) == 5
    assert isinstance(reekensemble.get_df("unsmry--last.csv"), pd.DataFrame)

    # Eclipse well names list
    assert len(reekensemble.get_wellnames("OP*")) == 5
    assert len(reekensemble.get_wellnames(None)) == 8
    assert len(reekensemble.get_wellnames()) == 8
    assert not reekensemble.get_wellnames("")
    assert len(reekensemble.get_wellnames(["OP*", "WI*"])) == 8

    # eclipse well groups list
    assert len(reekensemble.get_groupnames()) == 3

    # delta between two ensembles
    diff = reekensemble - reekensemble
    assert len(
        diff.get_smry(column_keys=["FOPR", "FGPR", "FWCT"]).columns) == 5

    # eclipse summary vector statistics for a given ensemble
    df_stats = reekensemble.get_smry_stats(column_keys=["FOPR", "FGPR"],
                                           time_index="monthly")
    assert isinstance(df_stats, pd.DataFrame)
    assert len(df_stats.columns) == 2
    assert isinstance(df_stats["FOPR"]["mean"], pd.Series)
    assert len(df_stats["FOPR"]["mean"].index) == 38

    # check if wild cards also work for get_smry_stats
    df_stats = reekensemble.get_smry_stats(column_keys=["FOP*", "FGP*"],
                                           time_index="monthly")
    assert len(df_stats.columns) == len(
        reekensemble.get_smrykeys(["FOP*", "FGP*"]))

    # Check webviz requirements for dataframe
    stats = df_stats.index.levels[0]
    assert "minimum" in stats
    assert "maximum" in stats
    assert "p10" in stats
    assert "p90" in stats
    assert "mean" in stats
    assert df_stats["FOPR"]["minimum"].iloc[-2] < df_stats["FOPR"][
        "maximum"].iloc[-2]

    # Check user supplied quantiles
    df_stats = reekensemble.get_smry_stats(column_keys=["FOPT"],
                                           time_index="yearly",
                                           quantiles=[0, 15, 50, 85, 100])
    statistics = df_stats.index.levels[0]
    assert "p0" in statistics
    assert "p15" in statistics
    assert "p50" in statistics
    assert "p85" in statistics
    assert "p100" in statistics

    # For oil industry, p15 on FOPT should yield a larger value than p85.
    # But the quantiles we get out follows the rest of the world
    # so we check for the opposite.
    assert df_stats["FOPT"]["p85"][-1] > df_stats["FOPT"]["p15"][-1]

    with pytest.raises(ValueError):
        reekensemble.get_smry_stats(column_keys=["FOPT"],
                                    time_index="yearly",
                                    quantiles=["foobar"])

    noquantiles = reekensemble.get_smry_stats(column_keys=["FOPT"],
                                              time_index="yearly",
                                              quantiles=[])
    assert len(noquantiles.index.levels[0]) == 3
Beispiel #17
0
def test_get_df():
    """Test the data retrieval functionality

    get_df() in the ensemble context is an aggregator, that will aggregate
    data from individual realaizations to the ensemble level, with
    optional merging capabilities performed on realization level."""
    testdir = os.path.dirname(os.path.abspath(__file__))
    ens = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    smry = ens.load_smry(column_keys="FO*", time_index="yearly")
    assert not ens.get_df("unsmry--yearly").empty
    assert not ens.get_df("unsmry--yearly.csv").empty
    assert not ens.get_df("share/results/tables/unsmry--yearly").empty
    assert not ens.get_df("share/results/tables/unsmry--yearly.csv").empty
    with pytest.raises(KeyError):
        # pylint: disable=pointless-statement
        ens.get_df("unsmry--monthly")
    ens.load_smry(column_keys="FO*", time_index="monthly")
    assert not ens.get_df("unsmry--monthly").empty
    with pytest.raises(KeyError):
        # pylint: disable=pointless-statement
        ens.get_df("unsmry-monthly")

    # Tests that we can do merges directly:
    params = ens.get_df("parameters.txt")
    smryparams = ens.get_df("unsmry--yearly", merge="parameters")
    # The set union is to handle the REAL column present in both smry and params:
    assert len(smryparams.columns) == len(
        set(smry.columns).union(params.columns))

    # Test multiple merges:
    outputs = ens.load_txt("outputs.txt")
    assert len(
        ens.get_df("unsmry--yearly",
                   merge=["parameters", "outputs.txt"]).columns) == len(
                       set(smry.columns).union(params.columns).union(
                           outputs.columns))

    # Try merging dataframes:
    ens.load_csv("share/results/volumes/simulator_volume_fipnum.csv")

    # Inject a mocked dataframe to the realization, there is
    # no "add_data" API for ensembles, but we can use the apply()
    # functionality
    def fipnum2zone():
        """Helper function for injecting mocked frame into
        each realization"""
        return pd.DataFrame(
            columns=["FIPNUM", "ZONE"],
            data=[
                [1, "UpperReek"],
                [2, "MidReek"],
                [3, "LowerReek"],
                [4, "UpperReek"],
                [5, "MidReek"],
                [6, "LowerReek"],
            ],
        )

    ens.apply(fipnum2zone, localpath="fipnum2zone")
    volframe = ens.get_df("simulator_volume_fipnum", merge="fipnum2zone")

    assert "ZONE" in volframe
    assert "FIPNUM" in volframe
    assert "STOIIP_OIL" in volframe
    assert len(volframe["ZONE"].unique()) == 3

    # Merge with scalar data:
    ens.load_scalar("npv.txt")
    vol_npv = ens.get_df("simulator_volume_fipnum", merge="npv.txt")
    # (this particular data combination does not really make sense)
    assert "STOIIP_OIL" in vol_npv
    assert "npv.txt" in vol_npv
Beispiel #18
0
def test_virtualensemble():
    """Test the properties of a virtualized ScratchEnsemble"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    manifest = {
        "what": "A test ensemble for pytest usage",
        "coordinate_system": "The correct one",
    }

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0",
        manifest=manifest,
    )
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_smry(column_keys=["FOPT", "FOIP"])
    reekensemble.load_smry(
        column_keys=["FGPT"],
        time_index=[
            datetime.strptime(strdate, "%Y-%m-%d %H:%M:%S")
            for strdate in ["2000-05-03 23:15:00", "2002-04-02 15:34:23"]
        ],
    )
    reekensemble.load_scalar("npv.txt")
    reekensemble.load_txt("outputs.txt")
    vens = reekensemble.to_virtual()

    assert "coordinate_system" in vens.manifest

    # Overwrite the manifest:
    vens.manifest = {"foo": "bar"}
    assert "foo" in vens.manifest
    assert "coordinate_system" not in vens.manifest

    # Check that we have data for 5 realizations
    assert len(vens["unsmry--yearly"]["REAL"].unique()) == 5
    assert len(vens["unsmry--raw"]["REAL"].unique()) == 5
    assert len(vens["unsmry--custom"]["REAL"].unique()) == 5
    assert len(vens["parameters.txt"]) == 5

    assert not vens.lazy_keys()

    # This is the dataframe of discovered files in the ScratchRealization
    assert isinstance(vens["__files"], pd.DataFrame)
    assert not vens["__files"].empty

    assert "REAL" in vens["STATUS"].columns

    # Check shorthand functionality:
    assert (
        vens.shortcut2path("unsmry--yearly")
        == "share/results/tables/unsmry--yearly.csv"
    )
    assert (
        vens.shortcut2path("unsmry--yearly.csv")
        == "share/results/tables/unsmry--yearly.csv"
    )

    assert "npv.txt" in vens.keys()
    assert len(vens["npv.txt"]) == 5  # includes the 'error!' string in real4
    assert "outputs.txt" in vens.keys()
    assert len(vens["outputs.txt"]) == 4

    # Check that get_smry() works
    # (here is with no interpolation necessary)
    fopt = vens.get_smry(column_keys=["FOPT"], time_index="yearly")
    assert "FOPT" in fopt.columns
    assert "DATE" in fopt.columns
    assert "REAL" in fopt.columns
    assert "FGPT" not in fopt.columns
    assert len(fopt) == 25

    # assert len(monthly_smry))==
    raw_smry = vens.get_smry(time_index="raw")
    pd.testing.assert_series_equal(
        vens.get_smry(time_index="first")["FOIP"].reset_index(drop=True),
        raw_smry[raw_smry["DATE"] == min(raw_smry["DATE"])]["FOIP"].reset_index(
            drop=True
        ),
    )
    pd.testing.assert_series_equal(
        vens.get_smry(time_index="last")["FOIP"].reset_index(drop=True),
        raw_smry[raw_smry["DATE"] == max(raw_smry["DATE"])]["FOIP"].reset_index(
            drop=True
        ),
    )

    # Check that we can default get_smry()
    alldefaults = vens.get_smry()
    # This should glob to all columns, and monthly time frequency
    # The 'monthly' is interpolated from the 'raw', as it is most likely
    # finer resolution than 'yearly'
    assert len(alldefaults) == 185
    assert len(alldefaults.columns) == 4
    # Check that monthly behaves the same way as default
    monthly_smry = vens.get_smry(time_index="monthly")
    assert len(monthly_smry) == 185
    assert len(monthly_smry.columns) == 4

    # Check that get_smry(time_index='raw')==get_smry(time_index=None)
    pd.testing.assert_series_equal(
        vens.get_smry(time_index="raw")["FOPT"].reset_index(drop=True),
        vens.get_smry(time_index=None)["FOPT"].reset_index(drop=True),
    )

    # Check that the custom smry has two dates
    assert len(vens.get_smry(time_index="custom")["DATE"].unique()) == 2

    # Eclipse summary vector statistics for a given ensemble
    df_stats = vens.get_smry_stats(column_keys=["FOPR", "FGPR"], time_index="yearly")
    assert isinstance(df_stats, pd.DataFrame)
    assert len(df_stats.columns) == 2
    assert isinstance(df_stats["FOPR"]["mean"], pd.Series)
    assert len(df_stats["FOPR"]["mean"]) == 5

    # Check webviz requirements for dataframe
    stats = df_stats.index.levels[0]
    assert "minimum" in stats
    assert "maximum" in stats
    assert "p10" in stats
    assert "p90" in stats
    assert "mean" in stats
    assert df_stats["FOPR"]["minimum"].iloc[-2] < df_stats["FOPR"]["maximum"].iloc[-2]

    # Test virtrealization retrieval:
    vreal = vens.get_realization(2)
    assert len(vreal.keys()) == len(vens.keys())
    assert set(vreal.keys()) == set(vens.keys())  # Order is not preserved

    # Test realization removal:
    vens.remove_realizations(3)
    assert len(vens.parameters["REAL"].unique()) == 4
    assert len(vens) == 4
    vens.remove_realizations(3)  # This will give warning
    assert len(vens.parameters["REAL"].unique()) == 4
    assert len(vens["unsmry--yearly"]["REAL"].unique()) == 4
    assert len(vens) == 4

    # Test data removal:
    vens.remove_data("parameters.txt")
    assert "parameters.txt" not in vens.keys()
    vens.remove_data("bogus")  # This should only give warning

    # Test data addition. It should(?) work also for earlier nonexisting
    vens.append(
        "betterdata",
        pd.DataFrame(
            {
                "REAL": [0, 1, 2, 3, 4, 5, 6, 80],
                "NPV": [1000, 2000, 1500, 2300, 6000, 3000, 800, 9],
            }
        ),
    )
    assert "betterdata" in vens.keys()
    assert "REAL" in vens["betterdata"].columns
    assert "NPV" in vens["betterdata"].columns

    assert vens.get_realization(3).get_df("betterdata")["NPV"] == 2300
    assert vens.get_realization(0).get_df("betterdata")["NPV"] == 1000
    assert vens.get_realization(1).get_df("betterdata")["NPV"] == 2000
    assert vens.get_realization(2).get_df("betterdata")["NPV"] == 1500
    assert vens.get_realization(80).get_df("betterdata")["NPV"] == 9

    with pytest.raises(ValueError):
        vens.get_realization(9999)

    assert vens.shortcut2path("betterdata") == "betterdata"
    assert vens.agg("min").get_df("betterdata")["NPV"] == 9
    assert vens.agg("max").get_df("betterdata")["NPV"] == 6000
    assert (
        vens.agg("min").get_df("betterdata")["NPV"]
        < vens.agg("p07").get_df("betterdata")["NPV"]
    )
    assert (
        vens.agg("p05").get_df("betterdata")["NPV"]
        < vens.agg("p55").get_df("betterdata")["NPV"]
    )
    assert (
        vens.agg("p46").get_df("betterdata")["NPV"]
        < vens.agg("max").get_df("betterdata")["NPV"]
    )

    assert "REAL" not in vens.agg("min")["STATUS"].columns

    # Betterdata should be returned as a dictionary
    # (it is returned from a virtualrealization object)
    assert isinstance(vens.agg("min").get_df("betterdata"), dict)
Beispiel #19
0
def test_todisk(tmpdir):
    """Test that we can write VirtualEnsembles to the filesystem in a
    retrievable manner"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0",
        manifest={"foo": "bar.com"},
    )
    reekensemble.load_smry(time_index="monthly", column_keys="*")
    reekensemble.load_smry(time_index="daily", column_keys="*")
    reekensemble.load_smry(time_index="yearly", column_keys="F*")
    reekensemble.load_scalar("npv.txt")
    reekensemble.load_txt("outputs.txt")
    vens = reekensemble.to_virtual()
    assert "foo" in vens.manifest

    tmpdir.chdir()

    vens.to_disk("vens_dumped", delete=True)
    assert len(vens) == len(reekensemble)

    fromdisk = VirtualEnsemble(fromdisk="vens_dumped")
    assert "foo" in fromdisk.manifest

    # Same number of realizations:
    assert len(fromdisk) == len(vens)

    # Should have all the same keys,
    # but change of order is fine
    assert set(vens.keys()) == set(fromdisk.keys())

    for frame in vens.keys():
        if frame == "STATUS":
            continue

        assert (vens.get_df(frame).columns == fromdisk.get_df(frame).columns).all()

        # Columns that only contains NaN will not have their
        # type preserved, this is too much to ask for, especially
        # with CSV files. So we drop columns with NaN
        virtframe = vens.get_df(frame).dropna("columns")
        diskframe = fromdisk.get_df(frame).dropna("columns")

        # It would be nice to be able to use pd.Dataframe.equals,
        # but it is too strict, as columns with mixed type number/strings
        # will easily be wrong.

        for column in set(virtframe.columns).intersection(set(diskframe.columns)):
            if object in (virtframe[column].dtype, diskframe[column].dtype):
                # Ensure we only compare strings when working with object dtype
                assert (
                    virtframe[column].astype(str).equals(diskframe[column].astype(str))
                )
            else:
                pd.testing.assert_series_equal(virtframe[column], diskframe[column])

    fromdisk.to_disk("vens_double_dumped", delete=True)
    # Here we could check filesystem equivalence if we want.

    vens.to_disk("vens_dumped_csv", delete=True, dumpparquet=False)
    fromcsvdisk = VirtualEnsemble(fromdisk="vens_dumped_csv")
    lazyfromdisk = VirtualEnsemble(fromdisk="vens_dumped_csv", lazy_load=True)
    assert set(vens.keys()) == set(fromcsvdisk.keys())
    assert set(vens.keys()) == set(lazyfromdisk.keys())
    assert "OK" in lazyfromdisk.lazy_frames.keys()
    assert "OK" not in lazyfromdisk.data.keys()
    assert len(fromcsvdisk.get_df("OK")) == len(lazyfromdisk.get_df("OK"))
    assert "OK" not in lazyfromdisk.lazy_frames.keys()
    assert "OK" in lazyfromdisk.data.keys()
    assert len(fromcsvdisk.parameters) == len(lazyfromdisk.parameters)
    assert len(fromcsvdisk.get_df("unsmry--yearly")) == len(
        lazyfromdisk.get_df("unsmry--yearly")
    )

    if HAVE_PYARROW:
        vens.to_disk("vens_dumped_parquet", delete=True, dumpcsv=False)
        fromparquetdisk = VirtualEnsemble()
        fromparquetdisk.from_disk("vens_dumped_parquet")
        assert set(vens.keys()) == set(fromparquetdisk.keys())

        fromparquetdisk2 = VirtualEnsemble()
        fromparquetdisk2.from_disk("vens_dumped_parquet", fmt="csv")
        # Here we will miss a lot of CSV files, because we only wrote parquet:
        assert len(vens.keys()) > len(fromparquetdisk2.keys())

        fromcsvdisk2 = VirtualEnsemble()
        fromcsvdisk2.from_disk("vens_dumped_csv", fmt="parquet")
        # But even if we only try to load parquet files, when CSV
        # files are found without corresponding parquet, the CSV file
        # will be read.
        assert set(vens.keys()) == set(fromcsvdisk2.keys())

    # Test manual intervention:
    fooframe = pd.DataFrame(data=np.random.randn(3, 3), columns=["FOO", "BAR", "COM"])
    fooframe.to_csv(os.path.join("vens_dumped", "share/results/tables/randomdata.csv"))
    manualens = VirtualEnsemble(fromdisk="vens_dumped")
    assert "share/results/tables/randomdata.csv" not in manualens.keys()

    # Now with correct column header,
    # but floating point data for realizations..
    fooframe = pd.DataFrame(data=np.random.randn(3, 3), columns=["REAL", "BAR", "COM"])
    fooframe.to_csv(os.path.join("vens_dumped", "share/results/tables/randomdata.csv"))
    manualens = VirtualEnsemble(fromdisk="vens_dumped")
    assert "share/results/tables/randomdata.csv" not in manualens.keys()

    # Now with correct column header, and with integer data for REAL..
    fooframe = pd.DataFrame(
        data=np.random.randint(low=0, high=100, size=(3, 3)),
        columns=["REAL", "BAR", "COM"],
    )
    fooframe.to_csv(os.path.join("vens_dumped", "share/results/tables/randomdata.csv"))
    manualens = VirtualEnsemble(fromdisk="vens_dumped")
    assert "share/results/tables/randomdata.csv" in manualens.keys()
Beispiel #20
0
def test_vens_mismatch():
    """Test calculation of mismatch to virtualized ensemble data"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")
    ens.load_smry(column_keys=["FOPT*"], time_index="monthly")

    vens = ens.to_virtual()

    # We don't need time_index now, because monthly is all we have.
    obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]})

    mismatch = obs.mismatch(vens)
    mismatch_raw = obs.mismatch(ens)
    assert isinstance(mismatch, pd.DataFrame)
    assert not mismatch.empty
    assert "L1" in mismatch.columns
    assert "L2" in mismatch.columns
    assert "MISMATCH" in mismatch.columns

    assert mismatch["MISMATCH"].sum() != mismatch_raw["MISMATCH"].sum()

    obs_monthly = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "monthly"
        }]
    })
    assert ((mismatch.sort_values("REAL").reset_index(drop=True) ==
             obs_monthly.mismatch(ens).sort_values("REAL").reset_index(
                 drop=True)).all().all())

    # We should be able to do yearly smryh comparisons from virtualized
    # monthly profiles:
    obs_yearly = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "yearly"
        }]
    })
    mismatch_yearly = obs_yearly.mismatch(vens)
    assert mismatch_yearly["MISMATCH"].sum() != mismatch["MISMATCH"].sum()

    # When load_smry() is forgotten before virtualization:
    vens = ScratchEnsemble(
        "test", testdir + "/data/testensemble-reek001/" +
        "realization-*/iter-0/").to_virtual()
    with pytest.raises(ValueError):
        obs.mismatch(vens)

    # Removal of one realization in the virtualized ensemble:
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")
    ens.load_smry(column_keys=["FOPT*"], time_index="monthly")
    vens = ens.to_virtual()
    vens.remove_realizations(2)
    mismatch_subset = obs.mismatch(vens)
    assert 2 not in mismatch_subset["REAL"].unique()
    assert 0 in mismatch_subset["REAL"].unique()