def test_reek001_scalars(): """Test import of scalar values from files Files with scalar values can contain numerics or strings, or be empty.""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") assert "OK" in reekensemble.keys() assert isinstance(reekensemble.get_df("OK"), pd.DataFrame) assert len(reekensemble.get_df("OK")) == 5 # One of the npv.txt files contains the string "error!" reekensemble.load_scalar("npv.txt") npv = reekensemble.get_df("npv.txt") assert isinstance(npv, pd.DataFrame) assert "REAL" in npv assert "npv.txt" in npv # filename is the column name print(npv) assert len(npv) == 5 assert npv.dtypes["REAL"] == int assert npv.dtypes["npv.txt"] == object # This is undesirable, can cause trouble with aggregation # Try again: reekensemble.load_scalar("npv.txt", force_reread=True, convert_numeric=True) npv = reekensemble.get_df("npv.txt") assert npv.dtypes["npv.txt"] == int or npv.dtypes["npv.txt"] == float assert len(npv) == 4 # the error should now be removed reekensemble.load_scalar("emptyscalarfile") # missing in real-4 assert len(reekensemble.get_df("emptyscalarfile")) == 4 assert "emptyscalarfile" in reekensemble.keys() # Use when filter is merged. # assert len(reekensemble.filter('emptyscalarfile', inplace=True)) == 4 # If we try to read the empty files as numerical values, we should get # nothing back: with pytest.raises(ValueError): reekensemble.load_scalar("emptyscalarfile", force_reread=True, convert_numeric=True) with pytest.raises(ValueError): reekensemble.load_scalar("nonexistingfile")
def test_noautodiscovery(): """Test that we have full control over auto-discovery of UNSMRY files""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") # Default ensemble construction will include auto-discovery, check # that we got that: assert not reekensemble.get_smry(column_keys="FOPT").empty assert "UNSMRY" in reekensemble.files["FILETYPE"].values # Now try again, with no autodiscovery reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0", autodiscovery=False, ) assert reekensemble.get_smry(column_keys="FOPT").empty reekensemble.find_files("eclipse/model/*UNSMRY") assert not reekensemble.get_smry(column_keys="FOPT").empty # Some very basic data is discovered even though we have autodiscovery=False assert "parameters.txt" in reekensemble.keys() assert "STATUS" in reekensemble.keys() # If these are unwanted, we can delete explicitly: reekensemble.remove_data("parameters.txt") reekensemble.remove_data(["STATUS"]) assert "parameters.txt" not in reekensemble.keys() assert "STATUS" not in reekensemble.keys()
def test_ensemble_ecl(): """Eclipse specific functionality""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") # Eclipse summary keys: assert len(reekensemble.get_smrykeys("FOPT")) == 1 assert len(reekensemble.get_smrykeys("F*")) == 49 assert len(reekensemble.get_smrykeys(["F*", "W*"])) == 49 + 280 assert not reekensemble.get_smrykeys("BOGUS") # reading ensemble dataframe monthly = reekensemble.load_smry(time_index="monthly") monthly = reekensemble.load_smry(column_keys=["F*"], time_index="monthly") assert monthly.columns[0] == "REAL" # Enforce order of columns. assert monthly.columns[1] == "DATE" assert len(monthly) == 190 # Check that the result was cached in memory, not necessarily on disk.. assert isinstance(reekensemble.get_df("unsmry--monthly.csv"), pd.DataFrame) assert len(reekensemble.keys()) == 4 # When asking the ensemble for FOPR, we also get REAL as a column # in return. Note that the internal stored version will be # overwritten by each load_smry() assert len(reekensemble.load_smry(column_keys=["FOPR"]).columns) == 3 assert len(reekensemble.load_smry(column_keys=["FOP*"]).columns) == 11 assert len( reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).columns) == 12 # Check that there is now a cached version with raw dates: assert isinstance(reekensemble.get_df("unsmry--raw.csv"), pd.DataFrame) # The columns are not similar, this is allowed!' # If you get 3205 here, it means that you are using the union of # raw dates from all realizations, which is not correct assert len( reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).index) == 1700 # Date list handling: assert len(reekensemble.get_smry_dates(freq="report")) == 641 assert len(reekensemble.get_smry_dates(freq="raw")) == 641 assert len(reekensemble.get_smry_dates(freq="yearly")) == 5 assert len(reekensemble.get_smry_dates(freq="monthly")) == 38 assert len(reekensemble.get_smry_dates(freq="daily")) == 1098 assert len(reekensemble.get_smry_dates(freq="last")) == 1 assert reekensemble.get_smry_dates( freq="last") == reekensemble.get_smry_dates(freq="last", end_date="2050-02-01") assert str(reekensemble.get_smry_dates( freq="report")[-1]) == "2003-01-02 00:00:00" assert str( reekensemble.get_smry_dates(freq="raw")[-1]) == "2003-01-02 00:00:00" assert str(reekensemble.get_smry_dates(freq="yearly")[-1]) == "2004-01-01" assert str(reekensemble.get_smry_dates(freq="monthly")[-1]) == "2003-02-01" assert str(reekensemble.get_smry_dates(freq="daily")[-1]) == "2003-01-02" assert str(reekensemble.get_smry_dates(freq="last")[-1]) == "2003-01-02" assert (str( reekensemble.get_smry_dates( freq="daily", end_date="2002-03-03")[-1]) == "2002-03-03") assert (str( reekensemble.get_smry_dates( freq="daily", start_date="2002-03-03")[0]) == "2002-03-03") # Start and end outside of orig data and on the "wrong side" dates = reekensemble.get_smry_dates(end_date="1999-03-03") assert len(dates) == 1 assert str(dates[0]) == "1999-03-03" dates = reekensemble.get_smry_dates(start_date="2099-03-03") assert len(dates) == 1 assert str(dates[0]) == "2099-03-03" # Time interpolated dataframes with summary data: yearly = reekensemble.get_smry_dates(freq="yearly") assert len(reekensemble.load_smry(column_keys=["FOPT"], time_index=yearly)) == 25 # NB: This is cached in unsmry-custom.csv, not unsmry--yearly! # This usage is discouraged. Use 'yearly' in such cases. # Check that we can shortcut get_smry_dates: assert len( reekensemble.load_smry(column_keys=["FOPT"], time_index="yearly")) == 25 assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="last")) == 5 assert isinstance(reekensemble.get_df("unsmry--last.csv"), pd.DataFrame) # Eclipse well names list assert len(reekensemble.get_wellnames("OP*")) == 5 assert len(reekensemble.get_wellnames(None)) == 8 assert len(reekensemble.get_wellnames()) == 8 assert not reekensemble.get_wellnames("") assert len(reekensemble.get_wellnames(["OP*", "WI*"])) == 8 # eclipse well groups list assert len(reekensemble.get_groupnames()) == 3 # delta between two ensembles diff = reekensemble - reekensemble assert len( diff.get_smry(column_keys=["FOPR", "FGPR", "FWCT"]).columns) == 5 # eclipse summary vector statistics for a given ensemble df_stats = reekensemble.get_smry_stats(column_keys=["FOPR", "FGPR"], time_index="monthly") assert isinstance(df_stats, pd.DataFrame) assert len(df_stats.columns) == 2 assert isinstance(df_stats["FOPR"]["mean"], pd.Series) assert len(df_stats["FOPR"]["mean"].index) == 38 # check if wild cards also work for get_smry_stats df_stats = reekensemble.get_smry_stats(column_keys=["FOP*", "FGP*"], time_index="monthly") assert len(df_stats.columns) == len( reekensemble.get_smrykeys(["FOP*", "FGP*"])) # Check webviz requirements for dataframe stats = df_stats.index.levels[0] assert "minimum" in stats assert "maximum" in stats assert "p10" in stats assert "p90" in stats assert "mean" in stats assert df_stats["FOPR"]["minimum"].iloc[-2] < df_stats["FOPR"][ "maximum"].iloc[-2] # Check user supplied quantiles df_stats = reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=[0, 15, 50, 85, 100]) statistics = df_stats.index.levels[0] assert "p0" in statistics assert "p15" in statistics assert "p50" in statistics assert "p85" in statistics assert "p100" in statistics # For oil industry, p15 on FOPT should yield a larger value than p85. # But the quantiles we get out follows the rest of the world # so we check for the opposite. assert df_stats["FOPT"]["p85"][-1] > df_stats["FOPT"]["p15"][-1] with pytest.raises(ValueError): reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=["foobar"]) noquantiles = reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=[]) assert len(noquantiles.index.levels[0]) == 3
def test_reek001(tmp="TMP"): """Test import of a stripped 5 realization ensemble""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") assert isinstance(reekensemble, ScratchEnsemble) assert reekensemble.name == "reektest" assert len(reekensemble) == 5 assert isinstance(reekensemble[0], ScratchRealization) assert len( reekensemble.files[reekensemble.files.LOCALPATH == "jobs.json"]) == 5 assert (len(reekensemble.files[reekensemble.files.LOCALPATH == "parameters.txt"]) == 5) assert len( reekensemble.files[reekensemble.files.LOCALPATH == "STATUS"]) == 5 statusdf = reekensemble.get_df("STATUS") assert len(statusdf) == 250 # 5 realizations, 50 jobs in each assert "REAL" in statusdf.columns assert "FORWARD_MODEL" in statusdf.columns statusdf = statusdf.set_index(["REAL", "FORWARD_MODEL"]).sort_index() assert "DURATION" in statusdf.columns # calculated assert "argList" in statusdf.columns # from jobs.json # Sample check the duration for RMS in realization 4: assert int(statusdf.loc[4, "RMS_BATCH"]["DURATION"].values[0]) == 195 # STATUS in real4 is modified to simulate that Eclipse never finished: assert numpy.isnan(statusdf.loc[4, "ECLIPSE100_2014.2"]["DURATION"].values[0]) if not os.path.exists(tmp): os.mkdir(tmp) statusdf.to_csv(os.path.join(tmp, "status.csv"), index=False) # Parameters.txt paramsdf = reekensemble.load_txt("parameters.txt") assert len(paramsdf) == 5 # 5 realizations paramsdf = reekensemble.parameters # also test as property paramsdf = reekensemble.get_df("parameters.txt") assert len(paramsdf) == 5 assert len(paramsdf.columns) == 26 # 25 parameters, + REAL column paramsdf.to_csv(os.path.join(tmp, "params.csv"), index=False) # Check that the ensemble object has not tainted the realization dataframe: assert "REAL" not in reekensemble._realizations[0].get_df("parameters.txt") # The column FOO in parameters is only present in some, and # is present with NaN in real0: assert "FOO" in reekensemble.parameters.columns assert len(reekensemble.parameters["FOO"].dropna()) == 1 # (NaN ine one real, and non-existing in the others is the same thing) # Test loading of another txt file: reekensemble.load_txt("outputs.txt") assert "NPV" in reekensemble.load_txt("outputs.txt").columns # Check implicit discovery assert "outputs.txt" in reekensemble.files["LOCALPATH"].values assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]]) # File discovery: csvvolfiles = reekensemble.find_files("share/results/volumes/*csv", metadata={"GRID": "simgrid"}) assert isinstance(csvvolfiles, pd.DataFrame) assert "REAL" in csvvolfiles assert "FULLPATH" in csvvolfiles assert "LOCALPATH" in csvvolfiles assert "BASENAME" in csvvolfiles # Check the explicit metadata: assert "GRID" in csvvolfiles assert csvvolfiles["GRID"].unique() == ["simgrid"] reekensemble.files.to_csv(os.path.join(tmp, "files.csv"), index=False) # Check that rediscovery does not mess things up: filecount = len(reekensemble.files) newfiles = reekensemble.find_files("share/results/volumes/*csv") # Also note that we skipped metadata here in rediscovery: assert len(reekensemble.files) == filecount assert len(newfiles) == len(csvvolfiles) # The last invocation of find_files() should not return the metadata assert len(newfiles.columns) + 1 == len(csvvolfiles.columns) # FULLPATH should always contain absolute paths assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]]) # The metadata in the rediscovered files should have been removed assert len( reekensemble.files[reekensemble.files["GRID"] == "simgrid"]) == 0 # CSV files csvpath = "share/results/volumes/simulator_volume_fipnum.csv" vol_df = reekensemble.load_csv(csvpath) # Check that we have not tainted the realization dataframes: assert "REAL" not in reekensemble._realizations[0].get_df(csvpath) assert "REAL" in vol_df assert len(vol_df["REAL"].unique()) == 3 # missing in 2 reals vol_df.to_csv(os.path.join(tmp, "simulatorvolumes.csv"), index=False) # Test retrival of cached data vol_df2 = reekensemble.get_df(csvpath) assert "REAL" in vol_df2 assert len(vol_df2["REAL"].unique()) == 3 # missing in 2 reals # Realization deletion: reekensemble.remove_realizations([1, 3]) assert len(reekensemble) == 3 # Readd the same realizations reekensemble.add_realizations([ testdir + "/data/testensemble-reek001/" + "realization-1/iter-0", testdir + "/data/testensemble-reek001/" + "realization-3/iter-0", ]) assert len(reekensemble) == 5 assert len(reekensemble.files) == 24 # File discovery must be repeated for the newly added realizations reekensemble.find_files( "share/results/volumes/" + "simulator_volume_fipnum.csv", metadata={"GRID": "simgrid"}, ) assert len(reekensemble.files) == 25 # Test addition of already added realization: reekensemble.add_realizations(testdir + "/data/testensemble-reek001/" + "realization-1/iter-0") assert len(reekensemble) == 5 assert len(reekensemble.files) == 24 # discovered files are lost! keycount = len(reekensemble.keys()) reekensemble.remove_data("parameters.txt") assert len(reekensemble.keys()) == keycount - 1