def test_guess_ntequil(): """Test inferring the correct NTEQUIL""" assert inferdims.guess_dim("EQUIL\n200 2000/\n2000 2000/\n", "EQLDIMS", 0) == 2 assert inferdims.guess_dim("EQUIL\n200 2000/\n", "EQLDIMS", 0) == 1 assert inferdims.guess_dim("EQUIL\n200 2000 333/\n0 0/\n1 1/\n", "EQLDIMS", 0) == 3
def test_sgof_satnuminferrer(tmpdir, mocker): """Test inferring of SATNUMS in SGOF strings""" sgofstr = """ SGOF 0 0 1 1 1 1 0 0 / 0 0 1 1 0.5 0.5 0.5 0.5 1 1 0 0 / 0 0 1 0 0.1 0.1 0.1 0.1 1 1 0 0 / """ tmpdir.chdir() assert inferdims.guess_dim(sgofstr, "TABDIMS", 0) == 3 sgofdf = satfunc.df(sgofstr) assert "SATNUM" in sgofdf assert len(sgofdf["SATNUM"].unique()) == 3 assert len(sgofdf) == 8 inc = satfunc.df2ecl(sgofdf) df_from_inc = satfunc.df(inc) pd.testing.assert_frame_equal(sgofdf, df_from_inc) # Write to file and try to parse it with command line: sgoffile = "__sgof_tmp.txt" Path(sgoffile).write_text(sgofstr) mocker.patch( "sys.argv", ["ecl2csv", "satfunc", "-v", sgoffile, "-o", sgoffile + ".csv"]) ecl2csv.main() parsed_sgof = pd.read_csv(sgoffile + ".csv") assert len(parsed_sgof["SATNUM"].unique()) == 3
def test_guess_satnumcount(): # We always require a newline after a "/" in the Eclipse syntax # (anything between a / and \n is ignored) assert inferdims.guess_dim("SWOF\n0/\n0/\n", "TABDIMS", 0) == 2 assert inferdims.guess_dim("SWOF\n0/\n0/ \n0/\n", "TABDIMS", 0) == 3 assert inferdims.guess_dim("SWFN\n0/\n\n0/\n", "TABDIMS", 0) == 2 assert inferdims.guess_dim("SGOF\n0/\n", "TABDIMS", 0) == 1 assert inferdims.guess_dim("SGOF\n0/\n0/\n", "TABDIMS", 0) == 2 assert inferdims.guess_dim("SGOF\n0/\n0/\n0/\n", "TABDIMS", 0) == 3 assert (inferdims.guess_dim("SGOF\n0 0 0 0/\n0 0 0 0/\n0 0 0 0/\n", "TABDIMS", 0) == 3) assert (inferdims.guess_dim( "SGOF\n0 0 0 0 1 1 1 1/\n0 0 0 0 1 1 1 1/\n0 0 0 0 1 1 1/\n", "TABDIMS", 0) == 3)
def test_guess_satnumcount(): """Test that we are able to guess the SATUM count in difficult cases""" # We always require a newline after a "/" in the Eclipse syntax # (anything between a / and \n is ignored) assert inferdims.guess_dim("SWOF\n0/\n0/\n", "TABDIMS", 0) == 2 assert inferdims.guess_dim("SWOF\n0/\n0/ \n0/\n", "TABDIMS", 0) == 3 assert inferdims.guess_dim("SWFN\n0/\n\n0/\n", "TABDIMS", 0) == 2 assert inferdims.guess_dim("SGOF\n0/\n", "TABDIMS", 0) == 1 assert inferdims.guess_dim("SGOF\n0/\n0/\n", "TABDIMS", 0) == 2 assert inferdims.guess_dim("SGOF\n0/\n0/\n0/\n", "TABDIMS", 0) == 3 assert (inferdims.guess_dim("SGOF\n0 0 0 0/\n0 0 0 0/\n0 0 0 0/\n", "TABDIMS", 0) == 3) assert (inferdims.guess_dim( "SGOF\n0 0 0 0 1 1 1 1/\n0 0 0 0 1 1 1 1/\n0 0 0 0 1 1 1/\n", "TABDIMS", 0) == 3)
def test_guess_dim(): """Test error conditions""" with pytest.raises(ValueError, match="Only supports TABDIMS and EQLDIMS"): inferdims.guess_dim("SWOF\n0/\n0/\n", "WELLDIMS", 0) with pytest.raises(ValueError, match="Only support item 0 and 1 in TABDIMS"): inferdims.guess_dim("SWOF\n0/\n0/\n", "TABDIMS", 2) with pytest.raises(ValueError, match="Only item 0 in EQLDIMS can be estimated"): inferdims.guess_dim("EQUIL\n0/\n0/\n", "EQLDIMS", 1)
def test_sgof_satnuminferrer(tmpdir): """Test inferring of SATNUMS in SGOF strings""" sgofstr = """ SGOF 0 0 1 1 1 1 0 0 / 0 0 1 1 0.5 0.5 0.5 0.5 1 1 0 0 / 0 0 1 0 0.1 0.1 0.1 0.1 1 1 0 0 / """ tmpdir.chdir() assert inferdims.guess_dim(sgofstr, "TABDIMS", 0) == 3 sgofdf = satfunc.deck2df(sgofstr) assert "SATNUM" in sgofdf assert len(sgofdf["SATNUM"].unique()) == 3 assert len(sgofdf) == 8 # This illustrates how we cannot do it, CRITICAL # logging errors will be displayed: sgofdf = satfunc.deck2df(EclFiles.str2deck(sgofstr)) assert len(sgofdf["SATNUM"].unique()) == 1 # Write to file and try to parse it with command line: sgoffile = "__sgof_tmp.txt" with open(sgoffile, "w") as sgof_f: sgof_f.write(sgofstr) sys.argv = ["ecl2csv", "satfunc", sgoffile, "-o", sgoffile + ".csv"] ecl2csv.main() parsed_sgof = pd.read_csv(sgoffile + ".csv") assert len(parsed_sgof["SATNUM"].unique()) == 3
def deck2df(deck, satnumcount=None): """Extract the data in the saturation function keywords as a Pandas DataFrame. Data for all saturation functions are merged into one dataframe. The two first columns in the dataframe are 'KEYWORD' (which can be SWOF, SGOF, etc.), and then SATNUM which is an index counter from 1 and onwards. Then follows the data for each individual keyword that is found in the deck. SATNUM data can only be parsed correctly if TABDIMS is present and stating how many saturation functions there should be. If you have a string with TABDIMS missing, you must supply this as a string to this function, and not a parsed deck, as the default parser in EclFiles is very permissive (and only returning the first function by default). Arguments: deck (sunbeam.deck or str): Incoming data deck. Always supply as a string if you don't know TABDIMS-NTSFUN. satnumcount (int): Number of SATNUMs defined in the deck, only needed if TABDIMS with NTSFUN is not found in the deck. If not supplied (or None) and NTSFUN is not defined, it will be attempted inferred. Return: pd.DataFrame, columns 'SW', 'KRW', 'KROW', 'PC', .. """ if "TABDIMS" not in deck: if not isinstance(deck, str): logging.critical( "Will not be able to guess NTSFUN from a parsed deck without TABDIMS." ) logging.critical( ( "Only data for first SATNUM will be returned." "Instead, supply string to deck2df()" ) ) satnumcount = 1 # If TABDIMS is in the deck, NTSFUN always has a value. It will # be set to 1 if defaulted. if not satnumcount: logging.warning( "TABDIMS+NTSFUN or satnumcount not supplied. Will be guessed." ) ntsfun_estimate = inferdims.guess_dim(deck, "TABDIMS", 0) augmented_strdeck = inferdims.inject_dimcount( str(deck), "TABDIMS", 0, ntsfun_estimate ) # Re-parse the modified deck: deck = EclFiles.str2deck(augmented_strdeck) else: augmented_strdeck = inferdims.inject_dimcount( str(deck), "TABDIMS", 0, satnumcount ) # Re-parse the modified deck: deck = EclFiles.str2deck(augmented_strdeck) frames = [] for keyword in KEYWORD_COLUMNS: if keyword in deck: satnum = 1 for deckrecord in deck[keyword]: # All data for an entire SATNUM is returned in one list data = np.array(deckrecord[0]) # Split up into the correct number of columns column_count = len(KEYWORD_COLUMNS[keyword]) if len(data) % column_count: logging.error("Inconsistent data length or bug") return pd.DataFrame() satpoints = int(len(data) / column_count) dframe = pd.DataFrame( columns=KEYWORD_COLUMNS[keyword], data=data.reshape(satpoints, column_count), ) dframe["SATNUM"] = satnum dframe["KEYWORD"] = keyword dframe = dframe[["KEYWORD", "SATNUM"] + KEYWORD_COLUMNS[keyword]] satnum += 1 frames.append(dframe) nonempty_frames = [frame for frame in frames if not frame.empty] if nonempty_frames: return pd.concat(nonempty_frames, axis=0, sort=False) logging.warning("No saturation data found in deck") return pd.DataFrame()
def deck2df(deck, ntequl=None): """Extract the data in the EQUIL keyword as a Pandas DataFrame. How each data value in the EQUIL records are to be interpreted depends on the phase configuration in the deck, which means that we need more than the EQUIL section alone to determine the dataframe. If ntequil is not supplied and EQLDIMS is not in the deck, the equil data is not well defined in terms of sunbeam. This means that we have to infer the correct number of EQUIL lines from what gives us successful parsing from sunbeam. In those cases, the deck must be supplied as a string, if not, extra EQUIL lines are possibly already removed by the sunbeam parser in eclfiles.str2deck(). Arguments: deck (sunbeam.deck or str): Eclipse deck or string with deck. If not string, EQLDIMS must be present in the deck. ntequil (int): If not None, should state the NTEQUL in EQLDIMS. If None and EQLDIMS is not present, it will be inferred. Return: pd.DataFrame """ if "EQLDIMS" not in deck: if not isinstance(deck, str): logging.critical( "Will not be able to guess NTEQUL from a parsed deck without EQLDIMS." ) logging.critical( ("Only data for the first EQUIL will be returned. " "Instead, supply string to deck2df()")) ntequl = 1 if not ntequl: logging.warning( "EQLDIMS+NTEQUL or ntequl not supplied. Will be guessed") ntequl_estimate = inferdims.guess_dim(deck, "EQLDIMS", 0) augmented_strdeck = inferdims.inject_dimcount( deck, "EQLDIMS", 0, ntequl_estimate) deck = EclFiles.str2deck(augmented_strdeck) else: augmented_strdeck = inferdims.inject_dimcount( deck, "EQLDIMS", 0, ntequl) deck = EclFiles.str2deck(augmented_strdeck) if isinstance(deck, str): deck = EclFiles.str2deck(deck) phasecount = sum(["OIL" in deck, "GAS" in deck, "WATER" in deck]) if "OIL" in deck and "GAS" in deck and "WATER" in deck: # oil-water-gas columnnames = [ "DATUM", "PRESSURE", "OWC", "PCOWC", "GOC", "PCGOC", "INITRS", "INITRV", "ACCURACY", ] if "OIL" not in deck and "GAS" in deck and "WATER" in deck: # gas-water columnnames = [ "DATUM", "PRESSURE", "GWC", "PCGWC", "IGNORE1", "IGNORE2", "IGNORE3", "IGNORE4", "ACCURACY", ] if "OIL" in deck and "GAS" not in deck and "WATER" in deck: # oil-water columnnames = [ "DATUM", "PRESSURE", "OWC", "PCOWC", "IGNORE1", "IGNORE2", "IGNORE3", "IGNORE4", "ACCURACY", ] if "OIL" in deck and "GAS" in deck and "WATER" not in deck: # oil-gas columnnames = [ "DATUM", "PRESSURE", "IGNORE1", "IGNORE2", "GOC", "PCGOC", "IGNORE3", "IGNORE4", "ACCURACY", ] if phasecount == 1: columnnames = ["DATUM", "PRESSURE"] if not columnnames: raise ValueError("Unsupported phase configuration") if "EQUIL" not in deck: return pd.DataFrame records = [] for rec in deck["EQUIL"]: rowlist = [x[0] for x in rec] if len(rowlist) > len(columnnames): rowlist = rowlist[:len(columnnames)] logging.warning("Something wrong with columnnames " + "or EQUIL-data, data is chopped!") records.append(rowlist) dataframe = pd.DataFrame(columns=columnnames, data=records) # The column handling can be made prettier.. for col in dataframe.columns: if "IGNORE" in col: del dataframe[col] return dataframe