Ejemplo n.º 1
0
def test_ensembleset_reek001(tmp="TMP"):
    """Test import of a stripped 5 realization ensemble,
    manually doubled to two identical ensembles
    """

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ensdir = os.path.join(testdir, "data/testensemble-reek001/")

    # Copy iter-0 to iter-1, creating an identical ensemble
    # we can load for testing.
    for realizationdir in glob.glob(ensdir + "/realization-*"):
        if os.path.exists(realizationdir + "/iter-1"):
            if os.path.islink(realizationdir + "/iter-1"):
                os.remove(realizationdir + "/iter-1")
            else:
                shutil.rmtree(realizationdir + "/iter-1")
        os.symlink(realizationdir + "/iter-0", realizationdir + "/iter-1")

    iter0 = ScratchEnsemble("iter-0", ensdir + "/realization-*/iter-0")
    iter1 = ScratchEnsemble("iter-1", ensdir + "/realization-*/iter-1")

    ensset = EnsembleSet("reek001", [iter0, iter1])
    assert len(ensset) == 2
    assert len(ensset["iter-0"].get_df("STATUS")) == 250
    assert len(ensset["iter-1"].get_df("STATUS")) == 250

    # Try adding the same object over again
    try:
        ensset.add_ensemble(iter0)
    except ValueError:
        pass
    assert len(ensset) == 2  # Unchanged!

    # Initializing nothing, we get warning about the missing name
    noname = EnsembleSet()
    assert noname.name  # not None
    assert isinstance(noname.name, str)  # And it should be a string

    # Initialize starting from empty ensemble
    ensset2 = EnsembleSet("reek001", [])
    assert ensset2.name == "reek001"
    ensset2.add_ensemble(iter0)
    ensset2.add_ensemble(iter1)
    assert len(ensset2) == 2

    # Check that we can skip the empty list:
    ensset2x = EnsembleSet("reek001")
    ensset2x.add_ensemble(iter0)
    ensset2x.add_ensemble(iter1)
    assert len(ensset2x) == 2

    # Initialize directly from path with globbing:
    ensset3 = EnsembleSet("reek001direct", [])
    assert ensset3.name == "reek001direct"
    ensset3.add_ensembles_frompath(ensdir)
    assert len(ensset3) == 2

    # Alternative globbing:
    ensset4 = EnsembleSet("reek001direct2", frompath=ensdir)
    assert len(ensset4) == 2

    # Testing aggregation of parameters
    paramsdf = ensset3.parameters
    if not os.path.exists(tmp):
        os.mkdir(tmp)
    paramsdf.to_csv(os.path.join(tmp, "enssetparams.csv"), index=False)
    assert isinstance(paramsdf, pd.DataFrame)
    assert len(ensset3.parameters) == 10
    assert len(ensset3.parameters.columns) == 27
    assert "ENSEMBLE" in ensset3.parameters.columns
    assert "REAL" in ensset3.parameters.columns

    outputs = ensset3.load_txt("outputs.txt")
    assert "NPV" in outputs.columns

    # Test Eclipse summary handling:
    assert len(ensset3.get_smry_dates(freq="report")) == 641
    assert len(ensset3.get_smry_dates(freq="monthly")) == 37
    assert len(ensset3.load_smry(column_keys=["FOPT"],
                                 time_index="yearly")) == 50
    monthly = ensset3.load_smry(column_keys=["F*"], time_index="monthly")
    assert monthly.columns[0] == "ENSEMBLE"
    assert monthly.columns[1] == "REAL"
    assert monthly.columns[2] == "DATE"

    # Eclipse well names
    assert len(ensset3.get_wellnames("OP*")) == 5
    assert len(ensset3.get_wellnames("WI*")) == 3
    assert not ensset3.get_wellnames("")
    assert len(ensset3.get_wellnames()) == 8

    # Check that we can retrieve cached versions
    assert len(ensset3.get_df("unsmry--monthly")) == 380
    assert len(ensset3.get_df("unsmry--yearly")) == 50
    monthly.to_csv(os.path.join(tmp, "ensset-monthly.csv"), index=False)

    with pytest.raises(ValueError):
        ensset3.get_df("unsmry--weekly")

    # Check errors when we ask for stupid stuff
    with pytest.raises(ValueError):
        ensset3.load_csv("bogus.csv")
    with pytest.raises(ValueError):
        ensset3.get_df("bogus.csv")

    # Check get_smry()
    smry = ensset3.get_smry(time_index="yearly",
                            column_keys=["FWCT", "FGOR"],
                            end_date="2002-02-01")
    assert "ENSEMBLE" in smry
    assert "REAL" in smry
    assert len(smry["ENSEMBLE"].unique()) == 2
    assert len(smry["REAL"].unique()) == 5
    assert "FWCT" in smry
    assert "FGOR" in smry
    assert "DATE" in smry
    assert len(smry) == 40

    # Eclipse well names list
    assert len(ensset3.get_wellnames("OP*")) == 5
    assert len(ensset3.get_wellnames(None)) == 8
    assert len(ensset3.get_wellnames()) == 8
    assert not ensset3.get_wellnames("")
    assert len(ensset3.get_wellnames(["OP*", "WI*"])) == 8

    # Test aggregation of csv files:
    vol_df = ensset3.load_csv("share/results/volumes/" +
                              "simulator_volume_fipnum.csv")
    assert "REAL" in vol_df
    assert "ENSEMBLE" in vol_df
    assert len(vol_df["REAL"].unique()) == 3
    assert len(vol_df["ENSEMBLE"].unique()) == 2
    assert len(ensset3.keys()) == 7

    # Test scalar imports:
    ensset3.load_scalar("npv.txt")
    npv = ensset3.get_df("npv.txt")
    assert "ENSEMBLE" in npv
    assert "REAL" in npv
    assert "npv.txt" in npv
    assert len(npv) == 10
    # Scalar import with forced numerics:
    ensset3.load_scalar("npv.txt", convert_numeric=True, force_reread=True)
    npv = ensset3.get_df("npv.txt")
    assert len(npv) == 8

    predel_len = len(ensset3.keys())
    ensset3.drop("parameters.txt")
    assert len(ensset3.keys()) == predel_len - 1

    # Test callback functionality, that we can convert rms
    # volumetrics in each realization. First we need a
    # wrapper which is able to work on ScratchRealizations.
    def rms_vol2df(kwargs):
        fullpath = os.path.join(kwargs["realization"].runpath(),
                                kwargs["filename"])
        # The supplied callback should not fail too easy.
        if os.path.exists(fullpath):
            return volumetrics.rmsvolumetrics_txt2df(fullpath)
        else:
            return pd.DataFrame()

    if not SKIP_FMU_TOOLS:
        rmsvols_df = ensset3.apply(rms_vol2df,
                                   filename="share/results/volumes/" +
                                   "geogrid_vol_oil_1.txt")
        assert rmsvols_df["STOIIP_OIL"].sum() > 0
        assert len(rmsvols_df["REAL"].unique()) == 4
        assert len(rmsvols_df["ENSEMBLE"].unique()) == 2

        # Test that we can dump to disk as well and load from csv:
        ensset3.apply(
            rms_vol2df,
            filename="share/results/volumes/" + "geogrid_vol_oil_1.txt",
            localpath="share/results/volumes/geogrid--oil.csv",
            dumptodisk=True,
        )
        geogrid_oil = ensset3.load_csv(
            "share/results/volumes/geogrid--oil.csv")
        assert len(geogrid_oil["REAL"].unique()) == 4
        assert len(geogrid_oil["ENSEMBLE"].unique()) == 2
        # Clean up what we just dumped:
        for real_dir in glob.glob(ensdir + "/realization-*"):
            csvfile = real_dir + "/iter-0/share/results/volumes/geogrid--oil.csv"
            if os.path.exists(csvfile):
                os.remove(csvfile)

    # Initialize differently, using only the root path containing
    # realization-*
    ensset4 = EnsembleSet("foo", frompath=ensdir)
    assert len(ensset4) == 2
    assert isinstance(ensset4["iter-0"], ScratchEnsemble)
    assert isinstance(ensset4["iter-1"], ScratchEnsemble)

    # Delete the symlink and leftover from apply-testing when we are done.
    for real_dir in glob.glob(ensdir + "/realization-*"):
        if not SKIP_FMU_TOOLS:
            csvfile = real_dir + "/iter-0/share/results/volumes/geogrid--oil.csv"
            if os.path.exists(csvfile):
                os.remove(csvfile)
        if os.path.exists(real_dir + "/iter-1"):
            os.remove(real_dir + "/iter-1")
Ejemplo n.º 2
0
def test_ensembleset_reek001(tmpdir):
    """Test import of a stripped 5 realization ensemble,
    manually doubled to two identical ensembles
    """

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ensdir = os.path.join(testdir, "data/testensemble-reek001/")

    tmpdir.chdir()
    symlink_iter(ensdir, "iter-0")
    symlink_iter(ensdir, "iter-1")

    iter0 = ScratchEnsemble("iter-0", str(tmpdir.join("realization-*/iter-0")))
    iter1 = ScratchEnsemble("iter-1", str(tmpdir.join("realization-*/iter-1")))

    ensset = EnsembleSet("reek001", [iter0, iter1])

    assert len(ensset) == 2
    assert len(ensset["iter-0"].get_df("STATUS")) == 250
    assert len(ensset["iter-1"].get_df("STATUS")) == 250

    # Try adding the same object over again
    try:
        ensset.add_ensemble(iter0)
    except ValueError:
        pass
    assert len(ensset) == 2  # Unchanged!

    # Initializing nothing, we get warning about the missing name
    noname = EnsembleSet()
    assert noname.name  # not None
    assert isinstance(noname.name, str)  # And it should be a string

    # Initialize starting from empty ensemble
    ensset2 = EnsembleSet("reek001", [])
    assert ensset2.name == "reek001"
    ensset2.add_ensemble(iter0)
    ensset2.add_ensemble(iter1)
    assert len(ensset2) == 2

    # Check that we can skip the empty list:
    ensset2x = EnsembleSet("reek001")
    ensset2x.add_ensemble(iter0)
    ensset2x.add_ensemble(iter1)
    assert len(ensset2x) == 2

    # Initialize directly from path with globbing:
    ensset3 = EnsembleSet("reek001direct", [])
    assert ensset3.name == "reek001direct"
    ensset3.add_ensembles_frompath(".")
    assert len(ensset3) == 2

    # Alternative globbing:
    ensset4 = EnsembleSet("reek001direct2", frompath=".")
    assert len(ensset4) == 2

    # Testing aggregation of parameters
    paramsdf = ensset3.parameters
    paramsdf.to_csv("enssetparams.csv", index=False)
    assert isinstance(paramsdf, pd.DataFrame)
    assert len(ensset3.parameters) == 10
    assert len(ensset3.parameters.columns) == 27
    assert "ENSEMBLE" in ensset3.parameters.columns
    assert "REAL" in ensset3.parameters.columns

    outputs = ensset3.load_txt("outputs.txt")
    assert "NPV" in outputs.columns

    # Test Eclipse summary handling:
    assert len(ensset3.get_smry_dates(freq="report")) == 641
    assert len(ensset3.get_smry_dates(freq="monthly")) == 37
    assert len(ensset3.load_smry(column_keys=["FOPT"],
                                 time_index="yearly")) == 50
    monthly = ensset3.load_smry(column_keys=["F*"], time_index="monthly")
    assert monthly.columns[0] == "ENSEMBLE"
    assert monthly.columns[1] == "REAL"
    assert monthly.columns[2] == "DATE"
    raw = ensset3.load_smry(column_keys=["F*PT"], time_index="raw")
    assert ensset3.load_smry(column_keys=["F*PT"]).iloc[3, 4] == raw.iloc[3, 4]
    assert (ensset3.load_smry(column_keys=["F*PT"],
                              time_index=None).iloc[3, 5] == raw.iloc[3, 5])

    # Eclipse well names
    assert len(ensset3.get_wellnames("OP*")) == 5
    assert len(ensset3.get_wellnames("WI*")) == 3
    assert not ensset3.get_wellnames("")
    assert len(ensset3.get_wellnames()) == 8

    # Check that we can retrieve cached versions
    assert len(ensset3.get_df("unsmry--monthly")) == 380
    assert len(ensset3.get_df("unsmry--yearly")) == 50
    monthly.to_csv("ensset-monthly.csv", index=False)

    # Test merging in get_df()
    param_output = ensset3.get_df("parameters.txt", merge="outputs")
    assert "top_structure" in param_output
    assert "SORG1" in param_output

    smry_params = ensset3.get_df("unsmry--monthly", merge="parameters")
    assert "SORG1" in smry_params
    assert "FWCT" in smry_params

    # Merging with something that does not exist:
    with pytest.raises(KeyError):
        ensset3.get_df("unsmry--monthly", merge="barrFF")

    with pytest.raises((KeyError, ValueError)):
        ensset3.get_df("unsmry--weekly")

    # Check errors when we ask for stupid stuff
    with pytest.raises((KeyError, ValueError)):
        ensset3.load_csv("bogus.csv")
    with pytest.raises((KeyError, ValueError)):
        ensset3.get_df("bogus.csv")

    # Check get_smry()
    smry = ensset3.get_smry(time_index="yearly",
                            column_keys=["FWCT", "FGOR"],
                            end_date="2002-02-01")
    assert "ENSEMBLE" in smry
    assert "REAL" in smry
    assert len(smry["ENSEMBLE"].unique()) == 2
    assert len(smry["REAL"].unique()) == 5
    assert "FWCT" in smry
    assert "FGOR" in smry
    assert "DATE" in smry
    assert len(smry) == 40

    # Eclipse well names list
    assert len(ensset3.get_wellnames("OP*")) == 5
    assert len(ensset3.get_wellnames(None)) == 8
    assert len(ensset3.get_wellnames()) == 8
    assert not ensset3.get_wellnames("")
    assert len(ensset3.get_wellnames(["OP*", "WI*"])) == 8

    # Test aggregation of csv files:
    vol_df = ensset3.load_csv("share/results/volumes/" +
                              "simulator_volume_fipnum.csv")
    assert "REAL" in vol_df
    assert "ENSEMBLE" in vol_df
    assert len(vol_df["REAL"].unique()) == 3
    assert len(vol_df["ENSEMBLE"].unique()) == 2
    assert len(ensset3.keys()) == 8

    # Test scalar imports:
    ensset3.load_scalar("npv.txt")
    npv = ensset3.get_df("npv.txt")
    assert "ENSEMBLE" in npv
    assert "REAL" in npv
    assert "npv.txt" in npv
    assert len(npv) == 10
    # Scalar import with forced numerics:
    ensset3.load_scalar("npv.txt", convert_numeric=True, force_reread=True)
    npv = ensset3.get_df("npv.txt")
    assert len(npv) == 8

    predel_len = len(ensset3.keys())
    ensset3.drop("parameters.txt")
    assert len(ensset3.keys()) == predel_len - 1

    # Test callback functionality, that we can convert rms
    # volumetrics in each realization. First we need a
    # wrapper which is able to work on ScratchRealizations.
    def rms_vol2df(kwargs):
        """Callback function to be sent to ensemble objects"""
        fullpath = os.path.join(kwargs["realization"].runpath(),
                                kwargs["filename"])
        # The supplied callback should not fail too easy.
        if os.path.exists(fullpath):
            return volumetrics.rmsvolumetrics_txt2df(fullpath)
        return pd.DataFrame()

    if not SKIP_FMU_TOOLS:
        rmsvols_df = ensset3.apply(rms_vol2df,
                                   filename="share/results/volumes/" +
                                   "geogrid_vol_oil_1.txt")
        assert rmsvols_df["STOIIP_OIL"].sum() > 0
        assert len(rmsvols_df["REAL"].unique()) == 4
        assert len(rmsvols_df["ENSEMBLE"].unique()) == 2

        # Test that we can dump to disk as well and load from csv:
        ensset3.apply(
            rms_vol2df,
            filename="share/results/volumes/" + "geogrid_vol_oil_1.txt",
            localpath="share/results/volumes/geogrid--oil.csv",
            dumptodisk=True,
        )
        geogrid_oil = ensset3.load_csv(
            "share/results/volumes/geogrid--oil.csv")
        assert len(geogrid_oil["REAL"].unique()) == 4
        assert len(geogrid_oil["ENSEMBLE"].unique()) == 2

    # Initialize differently, using only the root path containing
    # realization-*
    ensset4 = EnsembleSet("foo", frompath=".")
    assert len(ensset4) == 2
    assert isinstance(ensset4["iter-0"], ScratchEnsemble)
    assert isinstance(ensset4["iter-1"], ScratchEnsemble)

    # Try the batch command feature:
    ensset5 = EnsembleSet(
        "reek001",
        frompath=".",
        batch=[
            {
                "load_scalar": {
                    "localpath": "npv.txt"
                }
            },
            {
                "load_smry": {
                    "column_keys": "FOPT",
                    "time_index": "yearly"
                }
            },
            {
                "load_smry": {
                    "column_keys": "*",
                    "time_index": "daily"
                }
            },
        ],
    )
    assert len(ensset5.get_df("npv.txt")) == 10
    assert len(ensset5.get_df("unsmry--yearly")) == 50
    assert len(ensset5.get_df("unsmry--daily")) == 10980

    # Try batch processing after initialization:
    ensset6 = EnsembleSet("reek001", frompath=".")
    ensset6.process_batch(batch=[
        {
            "load_scalar": {
                "localpath": "npv.txt"
            }
        },
        {
            "load_smry": {
                "column_keys": "FOPT",
                "time_index": "yearly"
            }
        },
        {
            "load_smry": {
                "column_keys": "*",
                "time_index": "daily"
            }
        },
    ])
    assert len(ensset5.get_df("npv.txt")) == 10
    assert len(ensset5.get_df("unsmry--yearly")) == 50
    assert len(ensset5.get_df("unsmry--daily")) == 10980