def read_well_connection_status(
    ensemble_path: str, well_connection_status_file: str
) -> Optional[pd.DataFrame]:
    """Reads csv file with well connection status data from the scratch disk.
    Merges together files from all realizations, does some fixing of the column
    data types, and returns it as a pandas dataframe.

    fmu-ensemble is used to find the file names on the scratch disk

    The well connection status data is extracted from the CPI data, which is 0 if the
    connection is SHUT and >0 if the connection is OPEN. This is independent of
    the status of the well.
    """
    ens = ScratchEnsemble("ens", ensemble_path)
    df_files = ens.find_files(well_connection_status_file)

    if df_files.empty:
        return None

    df = pd.DataFrame()
    for _, row in df_files.iterrows():
        df_real = pd.read_csv(row.FULLPATH)
        df_real["REAL"] = row.REAL
        df = pd.concat([df, df_real])
    df.I = pd.to_numeric(df.I)
    df.J = pd.to_numeric(df.J)
    df["K1"] = pd.to_numeric(df.K)
    df = df.drop(["K"], axis=1)
    df.DATE = pd.to_datetime(df.DATE).dt.date
    return df
Exemple #2
0
def test_ens_failedreals():
    """Ensure we can calculate mismatch where some realizations
    do not have UNSMRY data"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/",
        autodiscovery=False,
    )
    obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]})
    mismatch = obs.mismatch(ens)

    # There are no UNSMRY found, so the mismatch should be empty:
    assert mismatch.empty

    ens.find_files("eclipse/model/*UNSMRY")
    assert not obs.mismatch(ens).empty

    # Reinitialize
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/",
        autodiscovery=False,
    )

    # Redirect UNSMRY pointer in realizaion 3 so it isn't found
    ens.find_files("eclipse/model/*UNSMRY")
    real3files = ens[3].files
    real3files.loc[real3files["FILETYPE"] == "UNSMRY", "FULLPATH"] = "FOO"

    # Check that we only have EclSum for 2 and not for 3:
    assert ens[2].get_eclsum()
    assert not ens[3].get_eclsum()

    missingsmry = obs.mismatch(ens)
    # Realization 3 should NOT be present now
    assert 3 not in list(missingsmry["REAL"])
    assert not obs.mismatch(ens).empty
Exemple #3
0
def test_noautodiscovery():
    """Test that we have full control over auto-discovery of UNSMRY files"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    # Default ensemble construction will include auto-discovery, check
    # that we got that:
    assert not reekensemble.get_smry(column_keys="FOPT").empty
    assert "UNSMRY" in reekensemble.files["FILETYPE"].values

    # Now try again, with no autodiscovery
    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0",
        autodiscovery=False,
    )
    assert reekensemble.get_smry(column_keys="FOPT").empty
    reekensemble.find_files("eclipse/model/*UNSMRY")
    assert not reekensemble.get_smry(column_keys="FOPT").empty

    # Some very basic data is discovered even though we have autodiscovery=False
    assert "parameters.txt" in reekensemble.keys()
    assert "STATUS" in reekensemble.keys()

    # If these are unwanted, we can delete explicitly:
    reekensemble.remove_data("parameters.txt")
    reekensemble.remove_data(["STATUS"])
    assert "parameters.txt" not in reekensemble.keys()
    assert "STATUS" not in reekensemble.keys()
Exemple #4
0
def test_reek001(tmp="TMP"):
    """Test import of a stripped 5 realization ensemble"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    assert isinstance(reekensemble, ScratchEnsemble)
    assert reekensemble.name == "reektest"
    assert len(reekensemble) == 5

    assert isinstance(reekensemble[0], ScratchRealization)

    assert len(
        reekensemble.files[reekensemble.files.LOCALPATH == "jobs.json"]) == 5
    assert (len(reekensemble.files[reekensemble.files.LOCALPATH ==
                                   "parameters.txt"]) == 5)
    assert len(
        reekensemble.files[reekensemble.files.LOCALPATH == "STATUS"]) == 5

    statusdf = reekensemble.get_df("STATUS")
    assert len(statusdf) == 250  # 5 realizations, 50 jobs in each
    assert "REAL" in statusdf.columns
    assert "FORWARD_MODEL" in statusdf.columns
    statusdf = statusdf.set_index(["REAL", "FORWARD_MODEL"]).sort_index()
    assert "DURATION" in statusdf.columns  # calculated
    assert "argList" in statusdf.columns  # from jobs.json

    # Sample check the duration for RMS in realization 4:
    assert int(statusdf.loc[4, "RMS_BATCH"]["DURATION"].values[0]) == 195

    # STATUS in real4 is modified to simulate that Eclipse never finished:
    assert numpy.isnan(statusdf.loc[4,
                                    "ECLIPSE100_2014.2"]["DURATION"].values[0])

    if not os.path.exists(tmp):
        os.mkdir(tmp)
    statusdf.to_csv(os.path.join(tmp, "status.csv"), index=False)

    # Parameters.txt
    paramsdf = reekensemble.load_txt("parameters.txt")
    assert len(paramsdf) == 5  # 5 realizations
    paramsdf = reekensemble.parameters  # also test as property
    paramsdf = reekensemble.get_df("parameters.txt")
    assert len(paramsdf) == 5
    assert len(paramsdf.columns) == 26  # 25 parameters, + REAL column
    paramsdf.to_csv(os.path.join(tmp, "params.csv"), index=False)

    # Check that the ensemble object has not tainted the realization dataframe:
    assert "REAL" not in reekensemble._realizations[0].get_df("parameters.txt")

    # The column FOO in parameters is only present in some, and
    # is present with NaN in real0:
    assert "FOO" in reekensemble.parameters.columns
    assert len(reekensemble.parameters["FOO"].dropna()) == 1
    # (NaN ine one real, and non-existing in the others is the same thing)

    # Test loading of another txt file:
    reekensemble.load_txt("outputs.txt")
    assert "NPV" in reekensemble.load_txt("outputs.txt").columns
    # Check implicit discovery
    assert "outputs.txt" in reekensemble.files["LOCALPATH"].values
    assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]])

    # File discovery:
    csvvolfiles = reekensemble.find_files("share/results/volumes/*csv",
                                          metadata={"GRID": "simgrid"})
    assert isinstance(csvvolfiles, pd.DataFrame)
    assert "REAL" in csvvolfiles
    assert "FULLPATH" in csvvolfiles
    assert "LOCALPATH" in csvvolfiles
    assert "BASENAME" in csvvolfiles
    # Check the explicit metadata:
    assert "GRID" in csvvolfiles
    assert csvvolfiles["GRID"].unique() == ["simgrid"]

    reekensemble.files.to_csv(os.path.join(tmp, "files.csv"), index=False)

    # Check that rediscovery does not mess things up:

    filecount = len(reekensemble.files)
    newfiles = reekensemble.find_files("share/results/volumes/*csv")
    # Also note that we skipped metadata here in rediscovery:

    assert len(reekensemble.files) == filecount
    assert len(newfiles) == len(csvvolfiles)

    # The last invocation of find_files() should not return the metadata
    assert len(newfiles.columns) + 1 == len(csvvolfiles.columns)

    # FULLPATH should always contain absolute paths
    assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]])

    # The metadata in the rediscovered files should have been removed
    assert len(
        reekensemble.files[reekensemble.files["GRID"] == "simgrid"]) == 0

    # CSV files
    csvpath = "share/results/volumes/simulator_volume_fipnum.csv"
    vol_df = reekensemble.load_csv(csvpath)

    # Check that we have not tainted the realization dataframes:
    assert "REAL" not in reekensemble._realizations[0].get_df(csvpath)

    assert "REAL" in vol_df
    assert len(vol_df["REAL"].unique()) == 3  # missing in 2 reals
    vol_df.to_csv(os.path.join(tmp, "simulatorvolumes.csv"), index=False)

    # Test retrival of cached data
    vol_df2 = reekensemble.get_df(csvpath)

    assert "REAL" in vol_df2
    assert len(vol_df2["REAL"].unique()) == 3  # missing in 2 reals

    # Realization deletion:
    reekensemble.remove_realizations([1, 3])
    assert len(reekensemble) == 3

    # Readd the same realizations
    reekensemble.add_realizations([
        testdir + "/data/testensemble-reek001/" + "realization-1/iter-0",
        testdir + "/data/testensemble-reek001/" + "realization-3/iter-0",
    ])
    assert len(reekensemble) == 5
    assert len(reekensemble.files) == 24

    # File discovery must be repeated for the newly added realizations
    reekensemble.find_files(
        "share/results/volumes/" + "simulator_volume_fipnum.csv",
        metadata={"GRID": "simgrid"},
    )
    assert len(reekensemble.files) == 25
    # Test addition of already added realization:
    reekensemble.add_realizations(testdir + "/data/testensemble-reek001/" +
                                  "realization-1/iter-0")
    assert len(reekensemble) == 5
    assert len(reekensemble.files) == 24  # discovered files are lost!

    keycount = len(reekensemble.keys())
    reekensemble.remove_data("parameters.txt")
    assert len(reekensemble.keys()) == keycount - 1