def test_emptyens(): """Check that we can initialize an empty ensemble""" ens = ScratchEnsemble("emptyens") assert not ens if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") emptydf = ens.get_smry() assert isinstance(emptydf, pd.DataFrame) assert emptydf.empty emptydatelist = ens.get_smry_dates() assert isinstance(emptydatelist, list) assert not emptydatelist emptykeys = ens.get_smrykeys() assert isinstance(emptykeys, list) assert not emptykeys emptyrates = ens.get_volumetric_rates() assert isinstance(emptyrates, pd.DataFrame) assert emptyrates.empty emptystats = ens.get_smry_stats() assert isinstance(emptystats, pd.DataFrame) assert emptystats.empty emptywells = ens.get_wellnames() assert isinstance(emptywells, list) assert not emptywells emptygroups = ens.get_groupnames() assert isinstance(emptygroups, list) assert not emptygroups emptymeta = ens.get_smry_meta() assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta("*") assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta("FOPT") assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta(["FOPT"]) assert isinstance(emptymeta, dict) assert not emptymeta # Add a realization manually: ens.add_realizations( testdir + "/data/testensemble-reek001/" + "realization-0/iter-0" ) assert len(ens) == 1
def test_reek001(tmp="TMP"): """Test import of a stripped 5 realization ensemble""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") assert isinstance(reekensemble, ScratchEnsemble) assert reekensemble.name == "reektest" assert len(reekensemble) == 5 assert isinstance(reekensemble[0], ScratchRealization) assert len( reekensemble.files[reekensemble.files.LOCALPATH == "jobs.json"]) == 5 assert (len(reekensemble.files[reekensemble.files.LOCALPATH == "parameters.txt"]) == 5) assert len( reekensemble.files[reekensemble.files.LOCALPATH == "STATUS"]) == 5 statusdf = reekensemble.get_df("STATUS") assert len(statusdf) == 250 # 5 realizations, 50 jobs in each assert "REAL" in statusdf.columns assert "FORWARD_MODEL" in statusdf.columns statusdf = statusdf.set_index(["REAL", "FORWARD_MODEL"]).sort_index() assert "DURATION" in statusdf.columns # calculated assert "argList" in statusdf.columns # from jobs.json # Sample check the duration for RMS in realization 4: assert int(statusdf.loc[4, "RMS_BATCH"]["DURATION"].values[0]) == 195 # STATUS in real4 is modified to simulate that Eclipse never finished: assert numpy.isnan(statusdf.loc[4, "ECLIPSE100_2014.2"]["DURATION"].values[0]) if not os.path.exists(tmp): os.mkdir(tmp) statusdf.to_csv(os.path.join(tmp, "status.csv"), index=False) # Parameters.txt paramsdf = reekensemble.load_txt("parameters.txt") assert len(paramsdf) == 5 # 5 realizations paramsdf = reekensemble.parameters # also test as property paramsdf = reekensemble.get_df("parameters.txt") assert len(paramsdf) == 5 assert len(paramsdf.columns) == 26 # 25 parameters, + REAL column paramsdf.to_csv(os.path.join(tmp, "params.csv"), index=False) # Check that the ensemble object has not tainted the realization dataframe: assert "REAL" not in reekensemble._realizations[0].get_df("parameters.txt") # The column FOO in parameters is only present in some, and # is present with NaN in real0: assert "FOO" in reekensemble.parameters.columns assert len(reekensemble.parameters["FOO"].dropna()) == 1 # (NaN ine one real, and non-existing in the others is the same thing) # Test loading of another txt file: reekensemble.load_txt("outputs.txt") assert "NPV" in reekensemble.load_txt("outputs.txt").columns # Check implicit discovery assert "outputs.txt" in reekensemble.files["LOCALPATH"].values assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]]) # File discovery: csvvolfiles = reekensemble.find_files("share/results/volumes/*csv", metadata={"GRID": "simgrid"}) assert isinstance(csvvolfiles, pd.DataFrame) assert "REAL" in csvvolfiles assert "FULLPATH" in csvvolfiles assert "LOCALPATH" in csvvolfiles assert "BASENAME" in csvvolfiles # Check the explicit metadata: assert "GRID" in csvvolfiles assert csvvolfiles["GRID"].unique() == ["simgrid"] reekensemble.files.to_csv(os.path.join(tmp, "files.csv"), index=False) # Check that rediscovery does not mess things up: filecount = len(reekensemble.files) newfiles = reekensemble.find_files("share/results/volumes/*csv") # Also note that we skipped metadata here in rediscovery: assert len(reekensemble.files) == filecount assert len(newfiles) == len(csvvolfiles) # The last invocation of find_files() should not return the metadata assert len(newfiles.columns) + 1 == len(csvvolfiles.columns) # FULLPATH should always contain absolute paths assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]]) # The metadata in the rediscovered files should have been removed assert len( reekensemble.files[reekensemble.files["GRID"] == "simgrid"]) == 0 # CSV files csvpath = "share/results/volumes/simulator_volume_fipnum.csv" vol_df = reekensemble.load_csv(csvpath) # Check that we have not tainted the realization dataframes: assert "REAL" not in reekensemble._realizations[0].get_df(csvpath) assert "REAL" in vol_df assert len(vol_df["REAL"].unique()) == 3 # missing in 2 reals vol_df.to_csv(os.path.join(tmp, "simulatorvolumes.csv"), index=False) # Test retrival of cached data vol_df2 = reekensemble.get_df(csvpath) assert "REAL" in vol_df2 assert len(vol_df2["REAL"].unique()) == 3 # missing in 2 reals # Realization deletion: reekensemble.remove_realizations([1, 3]) assert len(reekensemble) == 3 # Readd the same realizations reekensemble.add_realizations([ testdir + "/data/testensemble-reek001/" + "realization-1/iter-0", testdir + "/data/testensemble-reek001/" + "realization-3/iter-0", ]) assert len(reekensemble) == 5 assert len(reekensemble.files) == 24 # File discovery must be repeated for the newly added realizations reekensemble.find_files( "share/results/volumes/" + "simulator_volume_fipnum.csv", metadata={"GRID": "simgrid"}, ) assert len(reekensemble.files) == 25 # Test addition of already added realization: reekensemble.add_realizations(testdir + "/data/testensemble-reek001/" + "realization-1/iter-0") assert len(reekensemble) == 5 assert len(reekensemble.files) == 24 # discovered files are lost! keycount = len(reekensemble.keys()) reekensemble.remove_data("parameters.txt") assert len(reekensemble.keys()) == keycount - 1