Esempio n. 1
0
def test_roundtrip_resinsight(filename, readonly_testdata_dir):
    """Test converting all test data sets in testdir into resinsight and back again.

    ResInsight only supports SUMMARY_OBSERVATION.
    """
    dframe = autoparse_file(filename)[1]

    # Reduce to the subset supported by yaml:
    dframe = dframe[dframe["CLASS"] == "SUMMARY_OBSERVATION"].dropna(
        axis="columns", how="all")
    # Drop observations with no date:
    dframe = dframe[~dframe["DATE"].isna()].dropna(axis=1, how="all")

    # Convert to ResInsight dataframe format and back again:
    ri_dframe = df2resinsight_df(dframe)
    ri_roundtrip_dframe = resinsight_df2df(ri_dframe)

    # LABEL is not part of the ResInsight format, and a made-up label
    # is obtained through the roundtrip (when importing back). Skip it
    # when comparing.

    pd.testing.assert_frame_equal(
        ri_roundtrip_dframe.sort_index(axis="columns").drop(
            ["LABEL", "COMMENT", "SUBCOMMENT"],
            axis="columns",
            errors="ignore"),
        dframe.sort_index(axis="columns").drop(
            ["LABEL", "COMMENT", "SUBCOMMENT"],
            axis="columns",
            errors="ignore"),
        check_like=True,
    )
Esempio n. 2
0
def autoparse_file(filename):
    """Detects the observation file format for a given filename. This
    is done by attempting to parse its content and giving up on
    exceptions.

    NB: In case of ERT file formats, the include statements are
    interpreted relative to current working directory. Thus it
    is recommended to reparse with correct cwd after detecting ERT file
    format. The correct cwd for include-statement is the path of the
    ERT config file, which is outside the context of fmuobs.

    Args:
        filename (str)

    Returns:
        tuple: First element is a string in [resinsight, csv, yaml, ert], second
        element is a dataframe or a dict (if input was yaml).
    """
    try:
        dframe = pd.read_csv(filename, sep=";")
        if {"DATE", "VECTOR", "VALUE", "ERROR"}.issubset(
            set(dframe.columns)
        ) and not dframe.empty:
            logger.info("Parsed %s as a ResInsight observation file", filename)
            return ("resinsight", resinsight_df2df(dframe))
    except ValueError:
        pass

    try:
        dframe = pd.read_csv(filename, sep=",")
        if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty:
            logger.info(
                "Parsed %s as a CSV (internal dataframe format for ertobs) file",
                filename,
            )
            if "DATE" in dframe:
                dframe["DATE"] = pd.to_datetime(dframe["DATE"])
            return ("csv", dframe)
    except ValueError:
        pass

    try:
        with open(filename) as f_handle:
            obsdict = yaml.safe_load(f_handle.read())
        if isinstance(obsdict, dict):
            if obsdict.get("smry", None) or obsdict.get("rft", None):
                logger.info("Parsed %s as a YAML file with observations", filename)
                return ("yaml", obsdict2df(obsdict))
    except yaml.scanner.ScannerError as exception:
        # This occurs if there are tabs in the file, which is not
        # allowed in a YAML file (but it can be present in ERT observation files)
        logger.debug("ScannerError while attempting yaml-parsing")
        logger.debug(str(exception))
    except ValueError:
        pass

    try:
        with open(filename) as f_handle:
            # This function does not have information on include file paths.
            # Accept a FileNotFoundError while parsing, if we encounter that
            # it is most likely an ert file, but which needs additional hints
            # on where include files are located.
            try:
                dframe = ertobs2df(f_handle.read())
            except FileNotFoundError:
                logger.info(
                    "Parsed %s as an ERT observation file, with include statements",
                    filename,
                )
                return ("ert", pd.DataFrame())
        if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty:
            if set(dframe["CLASS"]).intersection(set(CLASS_SHORTNAME.keys())):
                logger.info("Parsed %s as an ERT observation file", filename)
                return ("ert", dframe)
    except ValueError:
        pass

    logger.error(
        "Unable to parse %s as any supported observation file format", filename
    )
    return (None, pd.DataFrame)