예제 #1
0
def test_reek001_scalars():
    """Test import of scalar values from files

    Files with scalar values can contain numerics or strings,
    or be empty."""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")

    assert "OK" in reekensemble.keys()
    assert isinstance(reekensemble.get_df("OK"), pd.DataFrame)
    assert len(reekensemble.get_df("OK")) == 5

    # One of the npv.txt files contains the string "error!"
    reekensemble.load_scalar("npv.txt")
    npv = reekensemble.get_df("npv.txt")
    assert isinstance(npv, pd.DataFrame)
    assert "REAL" in npv
    assert "npv.txt" in npv  # filename is the column name
    print(npv)
    assert len(npv) == 5
    assert npv.dtypes["REAL"] == int
    assert npv.dtypes["npv.txt"] == object
    # This is undesirable, can cause trouble with aggregation
    # Try again:
    reekensemble.load_scalar("npv.txt",
                             force_reread=True,
                             convert_numeric=True)
    npv = reekensemble.get_df("npv.txt")
    assert npv.dtypes["npv.txt"] == int or npv.dtypes["npv.txt"] == float
    assert len(npv) == 4  # the error should now be removed

    reekensemble.load_scalar("emptyscalarfile")  # missing in real-4
    assert len(reekensemble.get_df("emptyscalarfile")) == 4
    assert "emptyscalarfile" in reekensemble.keys()
    # Use when filter is merged.
    # assert len(reekensemble.filter('emptyscalarfile', inplace=True)) == 4

    # If we try to read the empty files as numerical values, we should get
    # nothing back:
    with pytest.raises(ValueError):
        reekensemble.load_scalar("emptyscalarfile",
                                 force_reread=True,
                                 convert_numeric=True)

    with pytest.raises(ValueError):
        reekensemble.load_scalar("nonexistingfile")
예제 #2
0
def test_noautodiscovery():
    """Test that we have full control over auto-discovery of UNSMRY files"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    # Default ensemble construction will include auto-discovery, check
    # that we got that:
    assert not reekensemble.get_smry(column_keys="FOPT").empty
    assert "UNSMRY" in reekensemble.files["FILETYPE"].values

    # Now try again, with no autodiscovery
    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0",
        autodiscovery=False,
    )
    assert reekensemble.get_smry(column_keys="FOPT").empty
    reekensemble.find_files("eclipse/model/*UNSMRY")
    assert not reekensemble.get_smry(column_keys="FOPT").empty

    # Some very basic data is discovered even though we have autodiscovery=False
    assert "parameters.txt" in reekensemble.keys()
    assert "STATUS" in reekensemble.keys()

    # If these are unwanted, we can delete explicitly:
    reekensemble.remove_data("parameters.txt")
    reekensemble.remove_data(["STATUS"])
    assert "parameters.txt" not in reekensemble.keys()
    assert "STATUS" not in reekensemble.keys()
예제 #3
0
def test_ensemble_ecl():
    """Eclipse specific functionality"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")

    # Eclipse summary keys:
    assert len(reekensemble.get_smrykeys("FOPT")) == 1
    assert len(reekensemble.get_smrykeys("F*")) == 49
    assert len(reekensemble.get_smrykeys(["F*", "W*"])) == 49 + 280
    assert not reekensemble.get_smrykeys("BOGUS")

    # reading ensemble dataframe
    monthly = reekensemble.load_smry(time_index="monthly")

    monthly = reekensemble.load_smry(column_keys=["F*"], time_index="monthly")
    assert monthly.columns[0] == "REAL"  # Enforce order of columns.
    assert monthly.columns[1] == "DATE"
    assert len(monthly) == 190
    # Check that the result was cached in memory, not necessarily on disk..
    assert isinstance(reekensemble.get_df("unsmry--monthly.csv"), pd.DataFrame)

    assert len(reekensemble.keys()) == 4

    # When asking the ensemble for FOPR, we also get REAL as a column
    # in return. Note that the internal stored version will be
    # overwritten by each load_smry()
    assert len(reekensemble.load_smry(column_keys=["FOPR"]).columns) == 3
    assert len(reekensemble.load_smry(column_keys=["FOP*"]).columns) == 11
    assert len(
        reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).columns) == 12

    # Check that there is now a cached version with raw dates:
    assert isinstance(reekensemble.get_df("unsmry--raw.csv"), pd.DataFrame)
    # The columns are not similar, this is allowed!'

    # If you get 3205 here, it means that you are using the union of
    # raw dates from all realizations, which is not correct
    assert len(
        reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).index) == 1700

    # Date list handling:
    assert len(reekensemble.get_smry_dates(freq="report")) == 641
    assert len(reekensemble.get_smry_dates(freq="raw")) == 641
    assert len(reekensemble.get_smry_dates(freq="yearly")) == 5
    assert len(reekensemble.get_smry_dates(freq="monthly")) == 38
    assert len(reekensemble.get_smry_dates(freq="daily")) == 1098
    assert len(reekensemble.get_smry_dates(freq="last")) == 1
    assert reekensemble.get_smry_dates(
        freq="last") == reekensemble.get_smry_dates(freq="last",
                                                    end_date="2050-02-01")

    assert str(reekensemble.get_smry_dates(
        freq="report")[-1]) == "2003-01-02 00:00:00"
    assert str(
        reekensemble.get_smry_dates(freq="raw")[-1]) == "2003-01-02 00:00:00"
    assert str(reekensemble.get_smry_dates(freq="yearly")[-1]) == "2004-01-01"
    assert str(reekensemble.get_smry_dates(freq="monthly")[-1]) == "2003-02-01"
    assert str(reekensemble.get_smry_dates(freq="daily")[-1]) == "2003-01-02"
    assert str(reekensemble.get_smry_dates(freq="last")[-1]) == "2003-01-02"

    assert (str(
        reekensemble.get_smry_dates(
            freq="daily", end_date="2002-03-03")[-1]) == "2002-03-03")
    assert (str(
        reekensemble.get_smry_dates(
            freq="daily", start_date="2002-03-03")[0]) == "2002-03-03")

    # Start and end outside of orig data and on the "wrong side"
    dates = reekensemble.get_smry_dates(end_date="1999-03-03")
    assert len(dates) == 1
    assert str(dates[0]) == "1999-03-03"

    dates = reekensemble.get_smry_dates(start_date="2099-03-03")
    assert len(dates) == 1
    assert str(dates[0]) == "2099-03-03"

    # Time interpolated dataframes with summary data:
    yearly = reekensemble.get_smry_dates(freq="yearly")
    assert len(reekensemble.load_smry(column_keys=["FOPT"],
                                      time_index=yearly)) == 25
    # NB: This is cached in unsmry-custom.csv, not unsmry--yearly!
    # This usage is discouraged. Use 'yearly' in such cases.

    # Check that we can shortcut get_smry_dates:
    assert len(
        reekensemble.load_smry(column_keys=["FOPT"],
                               time_index="yearly")) == 25

    assert len(reekensemble.load_smry(column_keys=["FOPR"],
                                      time_index="last")) == 5
    assert isinstance(reekensemble.get_df("unsmry--last.csv"), pd.DataFrame)

    # Eclipse well names list
    assert len(reekensemble.get_wellnames("OP*")) == 5
    assert len(reekensemble.get_wellnames(None)) == 8
    assert len(reekensemble.get_wellnames()) == 8
    assert not reekensemble.get_wellnames("")
    assert len(reekensemble.get_wellnames(["OP*", "WI*"])) == 8

    # eclipse well groups list
    assert len(reekensemble.get_groupnames()) == 3

    # delta between two ensembles
    diff = reekensemble - reekensemble
    assert len(
        diff.get_smry(column_keys=["FOPR", "FGPR", "FWCT"]).columns) == 5

    # eclipse summary vector statistics for a given ensemble
    df_stats = reekensemble.get_smry_stats(column_keys=["FOPR", "FGPR"],
                                           time_index="monthly")
    assert isinstance(df_stats, pd.DataFrame)
    assert len(df_stats.columns) == 2
    assert isinstance(df_stats["FOPR"]["mean"], pd.Series)
    assert len(df_stats["FOPR"]["mean"].index) == 38

    # check if wild cards also work for get_smry_stats
    df_stats = reekensemble.get_smry_stats(column_keys=["FOP*", "FGP*"],
                                           time_index="monthly")
    assert len(df_stats.columns) == len(
        reekensemble.get_smrykeys(["FOP*", "FGP*"]))

    # Check webviz requirements for dataframe
    stats = df_stats.index.levels[0]
    assert "minimum" in stats
    assert "maximum" in stats
    assert "p10" in stats
    assert "p90" in stats
    assert "mean" in stats
    assert df_stats["FOPR"]["minimum"].iloc[-2] < df_stats["FOPR"][
        "maximum"].iloc[-2]

    # Check user supplied quantiles
    df_stats = reekensemble.get_smry_stats(column_keys=["FOPT"],
                                           time_index="yearly",
                                           quantiles=[0, 15, 50, 85, 100])
    statistics = df_stats.index.levels[0]
    assert "p0" in statistics
    assert "p15" in statistics
    assert "p50" in statistics
    assert "p85" in statistics
    assert "p100" in statistics

    # For oil industry, p15 on FOPT should yield a larger value than p85.
    # But the quantiles we get out follows the rest of the world
    # so we check for the opposite.
    assert df_stats["FOPT"]["p85"][-1] > df_stats["FOPT"]["p15"][-1]

    with pytest.raises(ValueError):
        reekensemble.get_smry_stats(column_keys=["FOPT"],
                                    time_index="yearly",
                                    quantiles=["foobar"])

    noquantiles = reekensemble.get_smry_stats(column_keys=["FOPT"],
                                              time_index="yearly",
                                              quantiles=[])
    assert len(noquantiles.index.levels[0]) == 3
예제 #4
0
def test_reek001(tmp="TMP"):
    """Test import of a stripped 5 realization ensemble"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    assert isinstance(reekensemble, ScratchEnsemble)
    assert reekensemble.name == "reektest"
    assert len(reekensemble) == 5

    assert isinstance(reekensemble[0], ScratchRealization)

    assert len(
        reekensemble.files[reekensemble.files.LOCALPATH == "jobs.json"]) == 5
    assert (len(reekensemble.files[reekensemble.files.LOCALPATH ==
                                   "parameters.txt"]) == 5)
    assert len(
        reekensemble.files[reekensemble.files.LOCALPATH == "STATUS"]) == 5

    statusdf = reekensemble.get_df("STATUS")
    assert len(statusdf) == 250  # 5 realizations, 50 jobs in each
    assert "REAL" in statusdf.columns
    assert "FORWARD_MODEL" in statusdf.columns
    statusdf = statusdf.set_index(["REAL", "FORWARD_MODEL"]).sort_index()
    assert "DURATION" in statusdf.columns  # calculated
    assert "argList" in statusdf.columns  # from jobs.json

    # Sample check the duration for RMS in realization 4:
    assert int(statusdf.loc[4, "RMS_BATCH"]["DURATION"].values[0]) == 195

    # STATUS in real4 is modified to simulate that Eclipse never finished:
    assert numpy.isnan(statusdf.loc[4,
                                    "ECLIPSE100_2014.2"]["DURATION"].values[0])

    if not os.path.exists(tmp):
        os.mkdir(tmp)
    statusdf.to_csv(os.path.join(tmp, "status.csv"), index=False)

    # Parameters.txt
    paramsdf = reekensemble.load_txt("parameters.txt")
    assert len(paramsdf) == 5  # 5 realizations
    paramsdf = reekensemble.parameters  # also test as property
    paramsdf = reekensemble.get_df("parameters.txt")
    assert len(paramsdf) == 5
    assert len(paramsdf.columns) == 26  # 25 parameters, + REAL column
    paramsdf.to_csv(os.path.join(tmp, "params.csv"), index=False)

    # Check that the ensemble object has not tainted the realization dataframe:
    assert "REAL" not in reekensemble._realizations[0].get_df("parameters.txt")

    # The column FOO in parameters is only present in some, and
    # is present with NaN in real0:
    assert "FOO" in reekensemble.parameters.columns
    assert len(reekensemble.parameters["FOO"].dropna()) == 1
    # (NaN ine one real, and non-existing in the others is the same thing)

    # Test loading of another txt file:
    reekensemble.load_txt("outputs.txt")
    assert "NPV" in reekensemble.load_txt("outputs.txt").columns
    # Check implicit discovery
    assert "outputs.txt" in reekensemble.files["LOCALPATH"].values
    assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]])

    # File discovery:
    csvvolfiles = reekensemble.find_files("share/results/volumes/*csv",
                                          metadata={"GRID": "simgrid"})
    assert isinstance(csvvolfiles, pd.DataFrame)
    assert "REAL" in csvvolfiles
    assert "FULLPATH" in csvvolfiles
    assert "LOCALPATH" in csvvolfiles
    assert "BASENAME" in csvvolfiles
    # Check the explicit metadata:
    assert "GRID" in csvvolfiles
    assert csvvolfiles["GRID"].unique() == ["simgrid"]

    reekensemble.files.to_csv(os.path.join(tmp, "files.csv"), index=False)

    # Check that rediscovery does not mess things up:

    filecount = len(reekensemble.files)
    newfiles = reekensemble.find_files("share/results/volumes/*csv")
    # Also note that we skipped metadata here in rediscovery:

    assert len(reekensemble.files) == filecount
    assert len(newfiles) == len(csvvolfiles)

    # The last invocation of find_files() should not return the metadata
    assert len(newfiles.columns) + 1 == len(csvvolfiles.columns)

    # FULLPATH should always contain absolute paths
    assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]])

    # The metadata in the rediscovered files should have been removed
    assert len(
        reekensemble.files[reekensemble.files["GRID"] == "simgrid"]) == 0

    # CSV files
    csvpath = "share/results/volumes/simulator_volume_fipnum.csv"
    vol_df = reekensemble.load_csv(csvpath)

    # Check that we have not tainted the realization dataframes:
    assert "REAL" not in reekensemble._realizations[0].get_df(csvpath)

    assert "REAL" in vol_df
    assert len(vol_df["REAL"].unique()) == 3  # missing in 2 reals
    vol_df.to_csv(os.path.join(tmp, "simulatorvolumes.csv"), index=False)

    # Test retrival of cached data
    vol_df2 = reekensemble.get_df(csvpath)

    assert "REAL" in vol_df2
    assert len(vol_df2["REAL"].unique()) == 3  # missing in 2 reals

    # Realization deletion:
    reekensemble.remove_realizations([1, 3])
    assert len(reekensemble) == 3

    # Readd the same realizations
    reekensemble.add_realizations([
        testdir + "/data/testensemble-reek001/" + "realization-1/iter-0",
        testdir + "/data/testensemble-reek001/" + "realization-3/iter-0",
    ])
    assert len(reekensemble) == 5
    assert len(reekensemble.files) == 24

    # File discovery must be repeated for the newly added realizations
    reekensemble.find_files(
        "share/results/volumes/" + "simulator_volume_fipnum.csv",
        metadata={"GRID": "simgrid"},
    )
    assert len(reekensemble.files) == 25
    # Test addition of already added realization:
    reekensemble.add_realizations(testdir + "/data/testensemble-reek001/" +
                                  "realization-1/iter-0")
    assert len(reekensemble) == 5
    assert len(reekensemble.files) == 24  # discovered files are lost!

    keycount = len(reekensemble.keys())
    reekensemble.remove_data("parameters.txt")
    assert len(reekensemble.keys()) == keycount - 1