def fmuobs( inputfile: str, ertobs: Optional[str] = None, yml: Optional[str] = None, resinsight: Optional[str] = None, csv: Optional[str] = None, verbose: bool = False, debug: bool = False, starttime: Optional[str] = None, includedir: bool = None, ): # pylint: disable=too-many-arguments """Alternative to main() with named arguments""" if verbose or debug: if __MAGIC_STDOUT__ in (csv, yml, ertobs): raise SystemExit("Don't use verbose/debug when writing to stdout") loglevel = logging.INFO if debug: loglevel = logging.DEBUG logger.setLevel(loglevel) getLogger("subscript.fmuobs.parsers").setLevel(loglevel) getLogger("subscript.fmuobs.writers").setLevel(loglevel) getLogger("subscript.fmuobs.util").setLevel(loglevel) (filetype, dframe) = autoparse_file(inputfile) # For ERT files, there is the problem of include-file-path. If not-found # include filepaths are present, the filetype is ert, but dframe is empty. if filetype == "ert" and pd.DataFrame.empty: with open(inputfile) as f_handle: input_str = f_handle.read() if not includedir: # Try and error for the location of include files, first in current # dir, then in the directory of the input file. The proper default # for cwd is the location of the ert config file, which is not # available in this parser, and must be supplied on command line. try: dframe = ertobs2df(input_str, cwd=".", starttime=starttime) except FileNotFoundError: dframe = ertobs2df( input_str, cwd=os.path.dirname(inputfile), starttime=starttime, ) else: dframe = ertobs2df(input_str, cwd=includedir) if starttime: dframe = compute_date_from_days(dframe) if not validate_internal_dframe(dframe): logger.error("Observation dataframe is invalid!") dump_results(dframe, csv, yml, resinsight, ertobs)
def test_ertobs2df_starttime(string, expected): """Test that when DAYS is given but no DATES, we can get a computed DATE if starttime is provided""" pd.testing.assert_frame_equal( ertobs2df(string, starttime="2020-01-01").sort_index(axis=1), expected.sort_index(axis=1), ) # Test again with datetime object passed, not string: pd.testing.assert_frame_equal( ertobs2df(string, starttime=datetime.date(2020, 1, 1)).sort_index(axis=1), expected.sort_index(axis=1), )
def test_ertobs2df(string, expected): """Test converting all the way from ERT observation format to a Pandas Dataframe works as expected (this includes many of the other functions that are also tested individually)""" dframe = ertobs2df(string) pd.testing.assert_frame_equal(dframe.sort_index(axis=1), expected.sort_index(axis=1), check_dtype=False) pd.testing.assert_frame_equal( ertobs2df(df2ertobs(dframe)).sort_index(axis=1), dframe.sort_index(axis=1)) # Round-trip test via yaml: if "DATE" not in expected: return round_trip_yaml_dframe = obsdict2df(df2obsdict(dframe)) pd.testing.assert_frame_equal(round_trip_yaml_dframe.sort_index(axis=1), dframe.sort_index(axis=1))
def test_roundtrip_ertobs(filename, readonly_testdata_dir): """Test converting all included test data sets into ERT observations (as strings) and then parsing it, ensuring that we end up in the same place""" dframe = autoparse_file(filename)[1] # Convert to ERT obs format and back again: ertobs_str = df2ertobs(dframe) ert_roundtrip_dframe = ertobs2df(ertobs_str) ert_roundtrip_dframe.set_index("CLASS", inplace=True) dframe.set_index("CLASS", inplace=True) # This big loop is only here to aid in debugging when # the dataframes do not match, asserting equivalence of # subframes for _class in dframe.index.unique(): roundtrip_subframe = ( ert_roundtrip_dframe.loc[[_class]] .dropna(axis=1, how="all") .sort_index(axis=1) ) subframe = dframe.loc[[_class]].dropna(axis=1, how="all").sort_index(axis=1) roundtrip_subframe.set_index( list( {"CLASS", "LABEL", "OBS", "SEGMENT"}.intersection( set(roundtrip_subframe.columns) ) ), inplace=True, ) roundtrip_subframe.sort_index(inplace=True) subframe.set_index( list( {"CLASS", "LABEL", "OBS", "SEGMENT"}.intersection(set(subframe.columns)) ), inplace=True, ) subframe.sort_index(inplace=True) # Comments are not preservable through ertobs roundtrips: subframe.drop( ["COMMENT", "SUBCOMMENT"], axis="columns", errors="ignore", inplace=True ) if _class == "BLOCK_OBSERVATION": if "WELL" in subframe: # WELL as used in yaml is not preservable in roundtrips del subframe["WELL"] # print(roundtrip_subframe) # print(subframe) pd.testing.assert_frame_equal( roundtrip_subframe.sort_index(), subframe.sort_index(), check_dtype=False, )
def test_dfsummary2ertobs(obs_df, expected_str): """Test that we can generate ERT summary observation text format from the internal dataframe representation""" assert dfsummary2ertobs(obs_df).strip() == expected_str.strip() # Should be able to go back again also for # a subset, but the comments are not attempted parsed: obs_df["DATE"] = pd.to_datetime(obs_df["DATE"]) pd.testing.assert_frame_equal( ertobs2df(expected_str), obs_df[obs_df["CLASS"] == "SUMMARY_OBSERVATION"].dropna( axis="columns", how="all").drop("COMMENT", axis=1, errors="ignore"), # We relax int/float problems as long as the values are equal: check_dtype=False, )
def autoparse_file(filename): """Detects the observation file format for a given filename. This is done by attempting to parse its content and giving up on exceptions. NB: In case of ERT file formats, the include statements are interpreted relative to current working directory. Thus it is recommended to reparse with correct cwd after detecting ERT file format. The correct cwd for include-statement is the path of the ERT config file, which is outside the context of fmuobs. Args: filename (str) Returns: tuple: First element is a string in [resinsight, csv, yaml, ert], second element is a dataframe or a dict (if input was yaml). """ try: dframe = pd.read_csv(filename, sep=";") if {"DATE", "VECTOR", "VALUE", "ERROR"}.issubset( set(dframe.columns) ) and not dframe.empty: logger.info("Parsed %s as a ResInsight observation file", filename) return ("resinsight", resinsight_df2df(dframe)) except ValueError: pass try: dframe = pd.read_csv(filename, sep=",") if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty: logger.info( "Parsed %s as a CSV (internal dataframe format for ertobs) file", filename, ) if "DATE" in dframe: dframe["DATE"] = pd.to_datetime(dframe["DATE"]) return ("csv", dframe) except ValueError: pass try: with open(filename) as f_handle: obsdict = yaml.safe_load(f_handle.read()) if isinstance(obsdict, dict): if obsdict.get("smry", None) or obsdict.get("rft", None): logger.info("Parsed %s as a YAML file with observations", filename) return ("yaml", obsdict2df(obsdict)) except yaml.scanner.ScannerError as exception: # This occurs if there are tabs in the file, which is not # allowed in a YAML file (but it can be present in ERT observation files) logger.debug("ScannerError while attempting yaml-parsing") logger.debug(str(exception)) except ValueError: pass try: with open(filename) as f_handle: # This function does not have information on include file paths. # Accept a FileNotFoundError while parsing, if we encounter that # it is most likely an ert file, but which needs additional hints # on where include files are located. try: dframe = ertobs2df(f_handle.read()) except FileNotFoundError: logger.info( "Parsed %s as an ERT observation file, with include statements", filename, ) return ("ert", pd.DataFrame()) if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty: if set(dframe["CLASS"]).intersection(set(CLASS_SHORTNAME.keys())): logger.info("Parsed %s as an ERT observation file", filename) return ("ert", dframe) except ValueError: pass logger.error( "Unable to parse %s as any supported observation file format", filename ) return (None, pd.DataFrame)