def test_ens_premature_ecl(tmpdir): """Check an ensemble where Eclipse has failed early in realization 1""" if "__file__" in globals(): testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") origensemble = ScratchEnsemble( "origreek", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ) raw_orig_smry = origensemble.load_smry() # Copy the ensemble to /tmp so we can modify the UNSMRY file in real 2: tmpdir.chdir() shutil.copytree(testdir + "/data/testensemble-reek001", "ens_fail_real_reek001") unsmry_filename = ( "ens_fail_real_reek001/realization-1/" + "iter-0/eclipse/model/2_R001_REEK-1.UNSMRY" ) shutil.copy(unsmry_filename + "-failed2000", unsmry_filename) failensemble = ScratchEnsemble( "failedreek", "ens_fail_real_reek001/realization-*/iter-0" ) raw_fail_smry = failensemble.load_smry() # This is usually superfluous when raw datetimes are obtained. raw_orig_smry["DATE"] = pd.to_datetime(raw_orig_smry["DATE"]) raw_fail_smry["DATE"] = pd.to_datetime(raw_fail_smry["DATE"]) # Homogeneous max-date in orig smry: assert len(raw_orig_smry.groupby("REAL").max()["DATE"].unique()) == 1 # Different values for raw_fail: assert len(raw_fail_smry.groupby("REAL").max()["DATE"].unique()) == 2 # END statement in schedule file on 2000-08-01 yields this: assert ( str(raw_fail_smry.groupby("REAL").max()["DATE"].loc[1]) == "2000-08-01 00:00:00" ) # Filter away all those that did not make it to the end. In normal scenarios, # this would be accomplished by .filter('OK'), but not in this test context. max_date = str(failensemble.get_smry()["DATE"].max()) filtered_fail_ensemble = failensemble.filter( "unsmry--raw", column="DATE", columncontains=max_date, inplace=False ) assert len(filtered_fail_ensemble) == 4 assert ( len(filtered_fail_ensemble.get_smry().groupby("REAL").max()["DATE"].unique()) == 1 ) # Check also get_smry(): assert len(failensemble.get_smry().groupby("REAL").max()["DATE"].unique()) == 2 # With time_index set to something, then all realization will get # interpolated onto the same date range assert ( len( failensemble.get_smry(time_index="monthly") .groupby("REAL") .max()["DATE"] .unique() ) == 1 ) # This is in fact *different* from what you would get from load_smry (issue #97) assert ( len( failensemble.load_smry(time_index="monthly") .groupby("REAL") .max()["DATE"] .unique() ) == 2 ) # (this behaviour might change, get_smry() is allowed in # the future to mimic load_smry()) # Check that FOPT is very much lower in real 1 in failed ensemble: assert ( failensemble.get_smry(column_keys="FOPT", time_index="monthly") .groupby("REAL") .max()["FOPT"] .loc[1] < 1500000 ) assert ( origensemble.get_smry(column_keys="FOPT", time_index="monthly") .groupby("REAL") .max()["FOPT"] .loc[1] > 6000000 ) # Also for yearly assert ( failensemble.get_smry(column_keys="FOPT", time_index="yearly") .groupby("REAL") .max()["FOPT"] .loc[1] < 1500000 ) assert ( origensemble.get_smry(column_keys="FOPT", time_index="yearly") .groupby("REAL") .max()["FOPT"] .loc[1] > 6000000 ) fail_foprs = failensemble.get_smry(column_keys="FOPR", time_index="monthly") # The FOPR rate vector should be all zero after the stop assert ( fail_foprs[ (fail_foprs["REAL"] == 1) & (fail_foprs["DATE"] > datetime.date(2000, 8, 1)) ]["FOPR"] .abs() .sum() == 0 ) assert ( fail_foprs[ (fail_foprs["REAL"] == 0) & (fail_foprs["DATE"] > datetime.date(2000, 8, 1)) ]["FOPR"] .abs() .sum() > 0 ) # This frame treats the "failed" realization as correct, # and it will affect the stats: fail_stats = failensemble.get_smry_stats(time_index="monthly") # Here, real 1 is removed filtered_stats = filtered_fail_ensemble.get_smry_stats(time_index="monthly") # Original stats orig_stats = origensemble.get_smry_stats(time_index="monthly") # The 30 last rows are the rows from 2000-09-01 to 2003-02-01: assert fail_stats.loc["minimum"]["FOPR"].iloc[-30:].abs().sum() == 0 assert fail_stats.loc["minimum"]["FOPT"].iloc[-30:].unique()[0] == 1431247.125 # Oh no, in filtered stats, the last date 2003-02-01 is # not included, probably a minor bug! # But that means that the indexing of the last 30 is a little bit rogue. # (this test should work even that bug is fixed) assert filtered_stats.loc["minimum"]["FOPR"].iloc[-29:].abs().sum() > 0 assert len(filtered_stats.loc["minimum"]["FOPT"].iloc[-29:].unique()) == 29 # Mean FOPR and FOPT should be affected by the zero-padded rates: assert ( fail_stats.loc["mean"].iloc[-10]["FOPR"] < filtered_stats.loc["mean"].iloc[-10]["FOPR"] ) assert ( fail_stats.loc["mean"].iloc[-10]["FOPR"] < orig_stats.loc["mean"].iloc[-10]["FOPR"] ) assert ( fail_stats.loc["mean"].iloc[-10]["FOPT"] < filtered_stats.loc["mean"].iloc[-10]["FOPT"] ) assert ( fail_stats.loc["mean"].iloc[-10]["FOPT"] < orig_stats.loc["mean"].iloc[-10]["FOPT"] ) # Delta profiles: delta_fail = origensemble - failensemble # Delta profiles are given for all realizations delta_fail_smry = delta_fail.get_smry() assert len(delta_fail_smry["REAL"].unique()) == 5 # and they all end at the same ultimate date: assert len(delta_fail_smry.groupby("REAL").max()["DATE"].unique()) == 1 # BUT, there is only NaNs for values after 2000-08-01: assert np.isnan( delta_fail_smry[ (delta_fail_smry["REAL"] == 1) & (delta_fail_smry["DATE"] > "2000-08-01") ]["FOPT"].unique()[0] ) # Delta profiles after filtering: delta_filtered = origensemble - filtered_fail_ensemble assert len(origensemble) == 5 assert len(filtered_fail_ensemble) == 4 # assert len(delta_filtered) == 4 # Only four realizations (requires #83 resolved) # to_virtual() and time_index can be removed when #83 is finished. delta_filtered_smry = delta_filtered.to_virtual().get_smry(time_index="monthly") # Should contain only four realizations, as one has been filtered away assert len(delta_filtered_smry["REAL"].unique()) == 4 # Ultimate date is the same in all four: assert len(delta_filtered_smry.groupby("REAL").max()["DATE"].unique()) == 1
def test_filter(): """Test filtering of realizations in ensembles Realizations not fulfilling tested conditions are dropped from the ensemble""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") dirs = testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" reekensemble = ScratchEnsemble("reektest", dirs) # This should just require a STATUS file to be there # for every realization assert len(reekensemble.filter("STATUS")) == 5 # Test string equivalence on numeric data: reekensemble.filter("parameters.txt", key="RMS_SEED", value="723121249", inplace=True) assert len(reekensemble) == 2 # (False positive from pylint on this line) assert reekensemble.agg("mean")["parameters"]["RMS_SEED"] == 723121249 # Test numeric equivalence reekensemble = ScratchEnsemble("reektest", dirs) reekensemble.filter("parameters.txt", key="RMS_SEED", value=723121249, inplace=True) assert len(reekensemble) == 2 assert reekensemble.agg("mean")["parameters"]["RMS_SEED"] == 723121249 reekensemble = ScratchEnsemble("reektest", dirs) filtered = reekensemble.filter("parameters.txt", key="FOO", inplace=False) assert len(filtered) == 2 # (NaN in one of the parameters.txt is True in this context) filtered = reekensemble.filter("parameters.txt", key="MULTFLT_F1", value=0.001, inplace=False) assert len(filtered) == 4 assert (len( reekensemble.filter("parameters.txt", key="FWL", value=1700, inplace=False)) == 3) assert (len( reekensemble.filter("parameters.txt", key="FWL", value="1700", inplace=False)) == 3) # This one is tricky, the empty string should correspond to # missing data - NOT IMPLEMENTED # assert len(reekensemble.filter('parameters.txt', key='FOO', # value='', inplace=False) == 4) # while no value means that the key must be present assert len(reekensemble.filter("parameters.txt", key="FOO", inplace=False)) == 2 # 'key' is not accepted for things that are tables. with pytest.raises(ValueError): reekensemble.filter("STATUS", key="ECLIPSE") with pytest.raises(ValueError): reekensemble.filter("STATUS", value="ECLIPSE") # Check column presence assert len(reekensemble.filter("STATUS", column="FORWARD_MODEL")) == 5 assert (len( reekensemble.filter("STATUS", column="FORWARD_MODEL", inplace=False)) == 5) assert not reekensemble.filter("STATUS", column="FOOBAR", inplace=False) with pytest.raises(ValueError): reekensemble.filter("STATUS", wrongarg="FOOBAR", inplace=False) assert (len( reekensemble.filter("STATUS", column="FORWARD_MODEL", columncontains="ECLIPSE100_2014.2")) == 5) assert not reekensemble.filter( "STATUS", column="FORWARD_MODEL", columncontains="ECLIPSE100_2010.2", inplace=False, ) reekensemble.load_smry() assert len(reekensemble.filter("unsmry--raw")) == 5 assert len(reekensemble.filter("unsmry--raw", column="FOPT")) == 5 assert not reekensemble.filter( "unsmry--raw", column="FOOBAR", inplace=False) assert len( reekensemble.filter("unsmry--raw", column="FOPT", columncontains=0)) == 5 assert not reekensemble.filter( "unsmry--raw", column="FOPT", columncontains=-1000, inplace=False) assert (len( reekensemble.filter("unsmry--raw", column="FOPT", columncontains=6025523.0, inplace=False)) == 1) assert (len( reekensemble.filter("unsmry--raw", column="FOPT", columncontains=6025523, inplace=False)) == 1) # We do not support strings here (not yet) # assert len(reekensemble.filter('unsmry--raw', column='FOPT', # columncontains='6025523.0', # inplace=False)) == 1 assert (len( reekensemble.filter("unsmry--raw", column="DATE", columncontains="2002-11-25", inplace=False)) == 5) assert (len( reekensemble.filter( "unsmry--raw", column="DATE", columncontains="2002-11-25 00:00:00", inplace=False, )) == 5) assert not reekensemble.filter( "unsmry--raw", column="DATE", columncontains="2002-11-25 00:00:01", inplace=False, ) assert (len( reekensemble.filter( "unsmry--raw", column="DATE", columncontains="2000-01-07 02:26:15", inplace=False, )) == 3) assert not reekensemble.filter("unsmry--raw", column="DATE", columncontains="2000-01-07", inplace=False)