Exemple #1
0
def fmuobs(
    inputfile: str,
    ertobs: Optional[str] = None,
    yml: Optional[str] = None,
    resinsight: Optional[str] = None,
    csv: Optional[str] = None,
    verbose: bool = False,
    debug: bool = False,
    starttime: Optional[str] = None,
    includedir: bool = None,
):
    # pylint: disable=too-many-arguments
    """Alternative to main() with named arguments"""
    if verbose or debug:
        if __MAGIC_STDOUT__ in (csv, yml, ertobs):
            raise SystemExit("Don't use verbose/debug when writing to stdout")
        loglevel = logging.INFO
        if debug:
            loglevel = logging.DEBUG
        logger.setLevel(loglevel)
        getLogger("subscript.fmuobs.parsers").setLevel(loglevel)
        getLogger("subscript.fmuobs.writers").setLevel(loglevel)
        getLogger("subscript.fmuobs.util").setLevel(loglevel)

    (filetype, dframe) = autoparse_file(inputfile)

    # For ERT files, there is the problem of include-file-path. If not-found
    # include filepaths are present, the filetype is ert, but dframe is empty.
    if filetype == "ert" and pd.DataFrame.empty:
        with open(inputfile) as f_handle:
            input_str = f_handle.read()
        if not includedir:
            # Try and error for the location of include files, first in current
            # dir, then in the directory of the input file. The proper default
            # for cwd is the location of the ert config file, which is not
            # available in this parser, and must be supplied on command line.
            try:
                dframe = ertobs2df(input_str, cwd=".", starttime=starttime)
            except FileNotFoundError:
                dframe = ertobs2df(
                    input_str,
                    cwd=os.path.dirname(inputfile),
                    starttime=starttime,
                )
        else:
            dframe = ertobs2df(input_str, cwd=includedir)

    if starttime:
        dframe = compute_date_from_days(dframe)

    if not validate_internal_dframe(dframe):
        logger.error("Observation dataframe is invalid!")

    dump_results(dframe, csv, yml, resinsight, ertobs)
def test_ertobs2df_starttime(string, expected):
    """Test that when DAYS is given but no DATES, we can
    get a computed DATE if starttime is provided"""
    pd.testing.assert_frame_equal(
        ertobs2df(string, starttime="2020-01-01").sort_index(axis=1),
        expected.sort_index(axis=1),
    )
    # Test again with datetime object passed, not string:
    pd.testing.assert_frame_equal(
        ertobs2df(string, starttime=datetime.date(2020, 1, 1)).sort_index(axis=1),
        expected.sort_index(axis=1),
    )
def test_ertobs2df(string, expected):
    """Test converting all the way from ERT observation format to a Pandas
    Dataframe works as expected (this includes many of the other functions
    that are also tested individually)"""
    dframe = ertobs2df(string)
    pd.testing.assert_frame_equal(dframe.sort_index(axis=1),
                                  expected.sort_index(axis=1),
                                  check_dtype=False)

    pd.testing.assert_frame_equal(
        ertobs2df(df2ertobs(dframe)).sort_index(axis=1),
        dframe.sort_index(axis=1))

    # Round-trip test via yaml:
    if "DATE" not in expected:
        return
    round_trip_yaml_dframe = obsdict2df(df2obsdict(dframe))
    pd.testing.assert_frame_equal(round_trip_yaml_dframe.sort_index(axis=1),
                                  dframe.sort_index(axis=1))
Exemple #4
0
def test_roundtrip_ertobs(filename, readonly_testdata_dir):
    """Test converting all included test data sets into ERT observations
    (as strings) and then parsing it, ensuring that we end up in the
    same place"""
    dframe = autoparse_file(filename)[1]

    # Convert to ERT obs format and back again:
    ertobs_str = df2ertobs(dframe)
    ert_roundtrip_dframe = ertobs2df(ertobs_str)
    ert_roundtrip_dframe.set_index("CLASS", inplace=True)
    dframe.set_index("CLASS", inplace=True)

    # This big loop is only here to aid in debugging when
    # the dataframes do not match, asserting equivalence of
    # subframes
    for _class in dframe.index.unique():
        roundtrip_subframe = (
            ert_roundtrip_dframe.loc[[_class]]
            .dropna(axis=1, how="all")
            .sort_index(axis=1)
        )
        subframe = dframe.loc[[_class]].dropna(axis=1, how="all").sort_index(axis=1)
        roundtrip_subframe.set_index(
            list(
                {"CLASS", "LABEL", "OBS", "SEGMENT"}.intersection(
                    set(roundtrip_subframe.columns)
                )
            ),
            inplace=True,
        )
        roundtrip_subframe.sort_index(inplace=True)
        subframe.set_index(
            list(
                {"CLASS", "LABEL", "OBS", "SEGMENT"}.intersection(set(subframe.columns))
            ),
            inplace=True,
        )
        subframe.sort_index(inplace=True)
        # Comments are not preservable through ertobs roundtrips:
        subframe.drop(
            ["COMMENT", "SUBCOMMENT"], axis="columns", errors="ignore", inplace=True
        )
        if _class == "BLOCK_OBSERVATION":
            if "WELL" in subframe:
                # WELL as used in yaml is not preservable in roundtrips
                del subframe["WELL"]
        # print(roundtrip_subframe)
        # print(subframe)

        pd.testing.assert_frame_equal(
            roundtrip_subframe.sort_index(),
            subframe.sort_index(),
            check_dtype=False,
        )
Exemple #5
0
def test_dfsummary2ertobs(obs_df, expected_str):
    """Test that we can generate ERT summary observation text format
    from the internal dataframe representation"""
    assert dfsummary2ertobs(obs_df).strip() == expected_str.strip()

    # Should be able to go back again also for
    # a subset, but the comments are not attempted parsed:
    obs_df["DATE"] = pd.to_datetime(obs_df["DATE"])
    pd.testing.assert_frame_equal(
        ertobs2df(expected_str),
        obs_df[obs_df["CLASS"] == "SUMMARY_OBSERVATION"].dropna(
            axis="columns", how="all").drop("COMMENT", axis=1,
                                            errors="ignore"),
        # We relax int/float problems as long as the values are equal:
        check_dtype=False,
    )
Exemple #6
0
def autoparse_file(filename):
    """Detects the observation file format for a given filename. This
    is done by attempting to parse its content and giving up on
    exceptions.

    NB: In case of ERT file formats, the include statements are
    interpreted relative to current working directory. Thus it
    is recommended to reparse with correct cwd after detecting ERT file
    format. The correct cwd for include-statement is the path of the
    ERT config file, which is outside the context of fmuobs.

    Args:
        filename (str)

    Returns:
        tuple: First element is a string in [resinsight, csv, yaml, ert], second
        element is a dataframe or a dict (if input was yaml).
    """
    try:
        dframe = pd.read_csv(filename, sep=";")
        if {"DATE", "VECTOR", "VALUE", "ERROR"}.issubset(
            set(dframe.columns)
        ) and not dframe.empty:
            logger.info("Parsed %s as a ResInsight observation file", filename)
            return ("resinsight", resinsight_df2df(dframe))
    except ValueError:
        pass

    try:
        dframe = pd.read_csv(filename, sep=",")
        if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty:
            logger.info(
                "Parsed %s as a CSV (internal dataframe format for ertobs) file",
                filename,
            )
            if "DATE" in dframe:
                dframe["DATE"] = pd.to_datetime(dframe["DATE"])
            return ("csv", dframe)
    except ValueError:
        pass

    try:
        with open(filename) as f_handle:
            obsdict = yaml.safe_load(f_handle.read())
        if isinstance(obsdict, dict):
            if obsdict.get("smry", None) or obsdict.get("rft", None):
                logger.info("Parsed %s as a YAML file with observations", filename)
                return ("yaml", obsdict2df(obsdict))
    except yaml.scanner.ScannerError as exception:
        # This occurs if there are tabs in the file, which is not
        # allowed in a YAML file (but it can be present in ERT observation files)
        logger.debug("ScannerError while attempting yaml-parsing")
        logger.debug(str(exception))
    except ValueError:
        pass

    try:
        with open(filename) as f_handle:
            # This function does not have information on include file paths.
            # Accept a FileNotFoundError while parsing, if we encounter that
            # it is most likely an ert file, but which needs additional hints
            # on where include files are located.
            try:
                dframe = ertobs2df(f_handle.read())
            except FileNotFoundError:
                logger.info(
                    "Parsed %s as an ERT observation file, with include statements",
                    filename,
                )
                return ("ert", pd.DataFrame())
        if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty:
            if set(dframe["CLASS"]).intersection(set(CLASS_SHORTNAME.keys())):
                logger.info("Parsed %s as an ERT observation file", filename)
                return ("ert", dframe)
    except ValueError:
        pass

    logger.error(
        "Unable to parse %s as any supported observation file format", filename
    )
    return (None, pd.DataFrame)