def test_manual_aggregation():
    """Test that aggregating an ensemble using
    RealizationCombination is the same as calling agg() on the
    ensemble"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv")

    # Aggregate an ensemble into a virtual "mean" realization
    mean = reekensemble.agg("mean")

    # Combine the ensemble members directly into a mean computation.
    # Also returns a virtual realization.
    manualmean = (1 / 5 *
                  (reekensemble[0] + reekensemble[1] + reekensemble[2] +
                   reekensemble[3] + reekensemble[4]))

    # Commutativity proof:
    assert mean["parameters"]["RMS_SEED"] == manualmean["parameters"][
        "RMS_SEED"]
Exemple #2
0
def test_virtual_observations():
    """Construct an virtual(?) observation object from a specific summary vector
    and use it to rank realizations for similarity.
    """

    # We need an ensemble to work with:
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")
    ens.load_smry(column_keys=["FOPT", "FGPT", "FWPT", "FWCT", "FGOR"],
                  time_index="yearly")

    # And we need some VirtualRealizations
    virtreals = {
        "p90realization": ens.agg("p90"),
        "meanrealization": ens.agg("mean"),
        "p10realization": ens.agg("p10"),
    }

    summaryvector = "FOPT"
    representative_realizations = {}
    for virtrealname, virtreal in six.iteritems(virtreals):
        # Create empty observation object
        obs = Observations({})
        obs.load_smry(virtreal, summaryvector, time_index="yearly")

        # Calculate how far each realization is from this observation set
        # (only one row pr. realization, as FOPTH is only one observation unit)
        mis = obs.mismatch(ens)

        closest_realization = (
            mis.groupby("REAL").sum()["L2"].sort_values().index.values[0])
        representative_realizations[virtrealname] = closest_realization

    assert representative_realizations["meanrealization"] == 4
    assert representative_realizations["p90realization"] == 2
    assert representative_realizations["p10realization"] == 1
def test_ensemble_aggregations(tmpdir):
    """Test aggregations of ensembles, that
    is taking means, medians, p10 and so on, producing
    virtual realizations"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    reekensemble.load_smry(time_index="monthly", column_keys=["F*"])
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv")
    reekensemble.load_scalar("npv.txt", convert_numeric=True)

    stats = {
        "mean": reekensemble.agg("mean"),
        "median": reekensemble.agg("median"),
        "min": reekensemble.agg("min"),
        "max": reekensemble.agg("max"),
        "p10": reekensemble.agg("p10"),  # low estimate
        "p90": reekensemble.agg("p90"),  # high estimate
    }

    tmpdir.chdir()
    stats["min"].to_disk("virtreal_min", delete=True)
    stats["max"].to_disk("virtreal_max", delete=True)
    stats["mean"].to_disk("virtreal_mean", delete=True)

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["p10"]["parameters.txt"]["RMS_SEED"])
    assert (stats["p10"]["parameters.txt"]["RMS_SEED"] <=
            stats["median"]["parameters.txt"]["RMS_SEED"])
    assert (stats["median"]["parameters.txt"]["RMS_SEED"] <=
            stats["p90"]["parameters.txt"]["RMS_SEED"])
    assert (stats["p90"]["parameters.txt"]["RMS_SEED"] <=
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["mean"]["parameters.txt"]["RMS_SEED"])
    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["unsmry--monthly"]["FOPT"].iloc[-1] <
            stats["max"]["unsmry--monthly"]["FOPT"].iloc[-1])

    # .loc[2] corresponds to FIPNUM=3
    assert (stats["min"]["simulator_volume_fipnum"].iloc[2]["STOIIP_OIL"] <
            stats["mean"]["simulator_volume_fipnum"].iloc[2]["STOIIP_OIL"])
    assert (stats["mean"]["simulator_volume_fipnum"].loc[2]["STOIIP_OIL"] <
            stats["max"]["simulator_volume_fipnum"].loc[2]["STOIIP_OIL"])

    # Aggregation of STATUS also works. Note that min and max
    # works for string columns, so the available data will vary
    # depending on aggregation method
    assert (stats["p10"]["STATUS"].iloc[49]["DURATION"] <
            stats["max"]["STATUS"].iloc[49]["DURATION"])
    # job 49 is the Eclipse forward model

    assert "npv.txt" in stats["mean"].keys()
    assert stats["mean"]["npv.txt"] == 3382.5

    # Test agg(excludekeys=..)
    assert "STATUS" not in reekensemble.agg("mean",
                                            excludekeys="STATUS").keys()
    assert "STATUS" not in reekensemble.agg("mean",
                                            keylist=["parameters.txt"]).keys()

    assert (reekensemble.agg("p01")["parameters"]["RMS_SEED"] <
            reekensemble.agg("p99")["parameters"]["RMS_SEED"])

    with pytest.raises(ValueError):
        reekensemble.agg("foobar")

    # Check that include/exclude functionality in agg() works:
    assert ("parameters.txt"
            not in reekensemble.agg("mean",
                                    excludekeys="parameters.txt").keys())
    assert ("parameters.txt"
            not in reekensemble.agg("mean",
                                    excludekeys=["parameters.txt"]).keys())
    assert "parameters.txt" not in reekensemble.agg("mean",
                                                    keylist="STATUS").keys()
    assert "parameters.txt" not in reekensemble.agg("mean",
                                                    keylist=["STATUS"]).keys()

    # Shorthand notion works for keys to include, but they
    # should get returned with fully qualified paths.
    assert ("share/results/tables/unsmry--yearly.csv"
            in reekensemble.agg("mean", keylist="unsmry--yearly").keys())
    assert ("share/results/tables/unsmry--yearly.csv"
            in reekensemble.agg("mean", keylist=["unsmry--yearly"]).keys())
    assert isinstance(
        reekensemble.agg("mean",
                         keylist="unsmry--yearly").get_df("unsmry--yearly"),
        pd.DataFrame,
    )
Exemple #4
0
def test_filter():
    """Test filtering of realizations in ensembles

    Realizations not fulfilling tested conditions are
    dropped from the ensemble"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    dirs = testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    reekensemble = ScratchEnsemble("reektest", dirs)

    # This should just require a STATUS file to be there
    # for every realization
    assert len(reekensemble.filter("STATUS")) == 5

    # Test string equivalence on numeric data:
    reekensemble.filter("parameters.txt",
                        key="RMS_SEED",
                        value="723121249",
                        inplace=True)
    assert len(reekensemble) == 2

    # (False positive from pylint on this line)
    assert reekensemble.agg("mean")["parameters"]["RMS_SEED"] == 723121249

    # Test numeric equivalence
    reekensemble = ScratchEnsemble("reektest", dirs)
    reekensemble.filter("parameters.txt",
                        key="RMS_SEED",
                        value=723121249,
                        inplace=True)
    assert len(reekensemble) == 2
    assert reekensemble.agg("mean")["parameters"]["RMS_SEED"] == 723121249

    reekensemble = ScratchEnsemble("reektest", dirs)
    filtered = reekensemble.filter("parameters.txt", key="FOO", inplace=False)
    assert len(filtered) == 2
    # (NaN in one of the parameters.txt is True in this context)

    filtered = reekensemble.filter("parameters.txt",
                                   key="MULTFLT_F1",
                                   value=0.001,
                                   inplace=False)
    assert len(filtered) == 4
    assert (len(
        reekensemble.filter("parameters.txt",
                            key="FWL",
                            value=1700,
                            inplace=False)) == 3)
    assert (len(
        reekensemble.filter("parameters.txt",
                            key="FWL",
                            value="1700",
                            inplace=False)) == 3)

    # This one is tricky, the empty string should correspond to
    # missing data - NOT IMPLEMENTED
    # assert len(reekensemble.filter('parameters.txt', key='FOO',
    #                               value='', inplace=False) == 4)

    # while no value means that the key must be present
    assert len(reekensemble.filter("parameters.txt", key="FOO",
                                   inplace=False)) == 2

    # 'key' is not accepted for things that are tables.
    with pytest.raises(ValueError):
        reekensemble.filter("STATUS", key="ECLIPSE")
    with pytest.raises(ValueError):
        reekensemble.filter("STATUS", value="ECLIPSE")

    # Check column presence
    assert len(reekensemble.filter("STATUS", column="FORWARD_MODEL")) == 5
    assert (len(
        reekensemble.filter("STATUS", column="FORWARD_MODEL",
                            inplace=False)) == 5)
    assert not reekensemble.filter("STATUS", column="FOOBAR", inplace=False)
    with pytest.raises(ValueError):
        reekensemble.filter("STATUS", wrongarg="FOOBAR", inplace=False)
    assert (len(
        reekensemble.filter("STATUS",
                            column="FORWARD_MODEL",
                            columncontains="ECLIPSE100_2014.2")) == 5)
    assert not reekensemble.filter(
        "STATUS",
        column="FORWARD_MODEL",
        columncontains="ECLIPSE100_2010.2",
        inplace=False,
    )
    reekensemble.load_smry()
    assert len(reekensemble.filter("unsmry--raw")) == 5
    assert len(reekensemble.filter("unsmry--raw", column="FOPT")) == 5
    assert not reekensemble.filter(
        "unsmry--raw", column="FOOBAR", inplace=False)
    assert len(
        reekensemble.filter("unsmry--raw", column="FOPT",
                            columncontains=0)) == 5
    assert not reekensemble.filter(
        "unsmry--raw", column="FOPT", columncontains=-1000, inplace=False)
    assert (len(
        reekensemble.filter("unsmry--raw",
                            column="FOPT",
                            columncontains=6025523.0,
                            inplace=False)) == 1)
    assert (len(
        reekensemble.filter("unsmry--raw",
                            column="FOPT",
                            columncontains=6025523,
                            inplace=False)) == 1)

    # We do not support strings here (not yet)
    # assert len(reekensemble.filter('unsmry--raw', column='FOPT',
    #                                columncontains='6025523.0',
    #                                inplace=False)) == 1

    assert (len(
        reekensemble.filter("unsmry--raw",
                            column="DATE",
                            columncontains="2002-11-25",
                            inplace=False)) == 5)
    assert (len(
        reekensemble.filter(
            "unsmry--raw",
            column="DATE",
            columncontains="2002-11-25 00:00:00",
            inplace=False,
        )) == 5)
    assert not reekensemble.filter(
        "unsmry--raw",
        column="DATE",
        columncontains="2002-11-25 00:00:01",
        inplace=False,
    )
    assert (len(
        reekensemble.filter(
            "unsmry--raw",
            column="DATE",
            columncontains="2000-01-07 02:26:15",
            inplace=False,
        )) == 3)
    assert not reekensemble.filter("unsmry--raw",
                                   column="DATE",
                                   columncontains="2000-01-07",
                                   inplace=False)