Ejemplo n.º 1
0
def test_roundtrip_yaml(filename, readonly_testdata_dir):
    """Test converting all test data sets in testdir into yaml and back again.

    Due to yaml supporting a subset of features in the internal dataframe format
    some exceptions must be hardcoded in this test function.

    Also pay attention to the way the yaml parser creates LABEL data.
    """
    dframe = autoparse_file(filename)[1]

    # Reduce to the subset supported by yaml:
    dframe = dframe[(dframe["CLASS"] == "SUMMARY_OBSERVATION")
                    | (dframe["CLASS"] == "BLOCK_OBSERVATION")].dropna(
                        axis="columns", how="all")
    # Convert to YAML (really dict) format and back again:
    obsdict = df2obsdict(dframe)
    yaml_roundtrip_dframe = obsdict2df(obsdict)
    yaml_roundtrip_dframe.set_index("CLASS", inplace=True)
    dframe.set_index("CLASS", inplace=True)
    if "WELL" in yaml_roundtrip_dframe:
        # WELL as used in yaml is not preservable in roundtrips
        del yaml_roundtrip_dframe["WELL"]
    if "WELL" in dframe:
        del dframe["WELL"]
    # print(yaml_roundtrip_dframe)
    # print(dframe)
    pd.testing.assert_frame_equal(
        yaml_roundtrip_dframe.sort_index(axis="columns").sort_values("LABEL"),
        dframe.sort_index(axis="columns").sort_values("LABEL"),
        check_like=True,
    )
Ejemplo n.º 2
0
def test_obsdict2df(obsdict, expected_df):
    """Test converting yaml format (any kind of observation) into internal
    dataframe format. Specifics in each class of observation has its own test
    functions"""
    if "DATE" in expected_df:
        expected_df["DATE"] = pd.to_datetime(expected_df["DATE"])
    pd.testing.assert_frame_equal(
        obsdict2df(obsdict).sort_index(axis=1),
        expected_df.sort_index(axis=1),
        check_dtype=False,
    )
Ejemplo n.º 3
0
def test_ertobs2df(string, expected):
    """Test converting all the way from ERT observation format to a Pandas
    Dataframe works as expected (this includes many of the other functions
    that are also tested individually)"""
    dframe = ertobs2df(string)
    pd.testing.assert_frame_equal(dframe.sort_index(axis=1),
                                  expected.sort_index(axis=1),
                                  check_dtype=False)

    pd.testing.assert_frame_equal(
        ertobs2df(df2ertobs(dframe)).sort_index(axis=1),
        dframe.sort_index(axis=1))

    # Round-trip test via yaml:
    if "DATE" not in expected:
        return
    round_trip_yaml_dframe = obsdict2df(df2obsdict(dframe))
    pd.testing.assert_frame_equal(round_trip_yaml_dframe.sort_index(axis=1),
                                  dframe.sort_index(axis=1))
Ejemplo n.º 4
0
def autoparse_file(filename):
    """Detects the observation file format for a given filename. This
    is done by attempting to parse its content and giving up on
    exceptions.

    NB: In case of ERT file formats, the include statements are
    interpreted relative to current working directory. Thus it
    is recommended to reparse with correct cwd after detecting ERT file
    format. The correct cwd for include-statement is the path of the
    ERT config file, which is outside the context of fmuobs.

    Args:
        filename (str)

    Returns:
        tuple: First element is a string in [resinsight, csv, yaml, ert], second
        element is a dataframe or a dict (if input was yaml).
    """
    try:
        dframe = pd.read_csv(filename, sep=";")
        if {"DATE", "VECTOR", "VALUE", "ERROR"}.issubset(
            set(dframe.columns)
        ) and not dframe.empty:
            logger.info("Parsed %s as a ResInsight observation file", filename)
            return ("resinsight", resinsight_df2df(dframe))
    except ValueError:
        pass

    try:
        dframe = pd.read_csv(filename, sep=",")
        if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty:
            logger.info(
                "Parsed %s as a CSV (internal dataframe format for ertobs) file",
                filename,
            )
            if "DATE" in dframe:
                dframe["DATE"] = pd.to_datetime(dframe["DATE"])
            return ("csv", dframe)
    except ValueError:
        pass

    try:
        with open(filename) as f_handle:
            obsdict = yaml.safe_load(f_handle.read())
        if isinstance(obsdict, dict):
            if obsdict.get("smry", None) or obsdict.get("rft", None):
                logger.info("Parsed %s as a YAML file with observations", filename)
                return ("yaml", obsdict2df(obsdict))
    except yaml.scanner.ScannerError as exception:
        # This occurs if there are tabs in the file, which is not
        # allowed in a YAML file (but it can be present in ERT observation files)
        logger.debug("ScannerError while attempting yaml-parsing")
        logger.debug(str(exception))
    except ValueError:
        pass

    try:
        with open(filename) as f_handle:
            # This function does not have information on include file paths.
            # Accept a FileNotFoundError while parsing, if we encounter that
            # it is most likely an ert file, but which needs additional hints
            # on where include files are located.
            try:
                dframe = ertobs2df(f_handle.read())
            except FileNotFoundError:
                logger.info(
                    "Parsed %s as an ERT observation file, with include statements",
                    filename,
                )
                return ("ert", pd.DataFrame())
        if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty:
            if set(dframe["CLASS"]).intersection(set(CLASS_SHORTNAME.keys())):
                logger.info("Parsed %s as an ERT observation file", filename)
                return ("ert", dframe)
    except ValueError:
        pass

    logger.error(
        "Unable to parse %s as any supported observation file format", filename
    )
    return (None, pd.DataFrame)