Example #1
0
def _load_measured_record(facade, obs_keys):
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(facade.get_alpha())
    measured_data.filter_ensemble_std(facade.get_std_cutoff())
    return measured_data
Example #2
0
def test_history_obs(monkeypatch, facade_snake_oil):

    fopr = MeasuredData(facade_snake_oil, ["FOPR"])
    fopr.remove_inactive_observations()

    assert all(
        fopr.data.columns.get_level_values("data_index").values == list(
            range(199)))
Example #3
0
def test_summary_obs(monkeypatch, facade_snake_oil):
    summary_obs = MeasuredData(facade_snake_oil, ["WOPR_OP1_72"])
    summary_obs.remove_inactive_observations()
    assert all(
        summary_obs.data.columns.get_level_values("data_index").values == [71])
    # Only one observation, we check the key_index is what we expect:
    assert summary_obs.data.columns.get_level_values(
        "key_index").values[0] == np.datetime64("2011-12-21")
Example #4
0
def _get_measured_data(
    facade, observation_keys, observation_index_list, alpha, std_cutoff
):
    measured_data = MeasuredData(facade, observation_keys, observation_index_list)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(alpha)
    measured_data.filter_ensemble_std(std_cutoff)
    return measured_data
Example #5
0
def test_gen_obs(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1"])
    df.remove_inactive_observations()

    assert all(
        df.data.columns.get_level_values("data_index").values ==
        [400, 800, 1200, 1800])
    assert all(
        df.data.columns.get_level_values("key_index").values ==
        [400, 800, 1200, 1800])
Example #6
0
def test_remove_failed_realizations(input_dataframe, expected_result,
                                    monkeypatch, facade, measured_data_setup):
    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    md.remove_failed_realizations()

    expected_result.columns = _set_multiindex(expected_result)

    assert md.data.equals(
        pd.concat({"test_key": expected_result.astype(float)}, axis=1))
Example #7
0
def test_get_simulated_data(input_dataframe, expected_result, monkeypatch,
                            facade, measured_data_setup):

    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    expected_result.columns = _set_multiindex(expected_result)

    result = md.get_simulated_data()
    assert result.equals(
        pd.concat({"test_key": expected_result.astype(float)}, axis=1))
Example #8
0
def test_filter_ensamble_std(std_cutoff, expected_result, monkeypatch, facade,
                             measured_data_setup):
    expected_result.columns = _set_multiindex(expected_result)

    input_dataframe = pd.DataFrame(data=[[1, 1.5], [1, 2.5]], index=[1, 2])
    input_obs = pd.DataFrame(data=[[1, 2], [0.1, 0.2]], index=["OBS", "STD"])
    input_obs.columns = _set_multiindex(input_obs)
    measured_data_setup(input_dataframe, input_obs, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    md.filter_ensemble_std(std_cutoff)
    assert md.data.equals(pd.concat({"obs_key": expected_result}, axis=1))
Example #9
0
def _load_measured_record(enkf_main):
    facade = LibresFacade(enkf_main)
    obs_keys = [
        facade.get_observation_key(nr)
        for nr, _ in enumerate(facade.get_observations())
    ]
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(facade.get_alpha())
    measured_data.filter_ensemble_std(facade.get_std_cutoff())
    return measured_data
Example #10
0
def test_filter_ens_mean_obs(alpha, expected_result, monkeypatch, facade,
                             measured_data_setup):
    expected_result.columns = _set_multiindex(expected_result)

    input_dataframe = pd.DataFrame(data=[[1, 2], [0.1, 0.2], [1.1, 1.6],
                                         [1, 2.5]],
                                   index=["OBS", "STD", 1, 2])

    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    md.filter_ensemble_mean_obs(alpha)
    assert md.data.equals(pd.concat({"test_key": expected_result}, axis=1))
Example #11
0
def test_empty_dataset_from_remove_inactive_observations(
    input_header,
    measured_data,
    monkeypatch,
    facade,
    measured_data_setup,
):
    input_header.columns = _set_multiindex(input_header)
    measured_data_setup(measured_data, input_header, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    with pytest.raises(ValueError, match="This operation results in an empty dataset"):
        md.remove_inactive_observations()
Example #12
0
def test_invalid_set_data(
    facade,
    monkeypatch,
    invalid_input,
    expected_error,
    valid_dataframe,
    measured_data_setup,
):

    measured_data_setup(valid_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"], index_lists=[[1, 2]])

    with pytest.raises(expected_error):
        md._set_data(invalid_input)
Example #13
0
def test_gen_obs_runtime(monkeypatch, copy_snake_oil):
    obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
    with obs_file.open(mode="a") as fin:
        fin.write(create_general_observation())

    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)

    df = MeasuredData(facade,
                      [f"CUSTOM_DIFF_{restart}" for restart in range(1, 500)])

    df.remove_inactive_observations()
    assert df.data.shape == (27, 1995)
Example #14
0
def test_gen_obs_and_summary_index_range(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1", "FOPR"], [[800], [10]])
    df.remove_inactive_observations()

    assert df.data.columns.get_level_values(0).to_list() == [
        "WPR_DIFF_1",
        "FOPR",
    ]
    assert df.data.columns.get_level_values("data_index").to_list() == [
        800,
        10,
    ]
    assert df.data.loc["OBS"].values == pytest.approx([0.1, 0.23281],
                                                      abs=0.00001)
    assert df.data.loc["STD"].values == pytest.approx([0.2, 0.1])
Example #15
0
def _extract_and_dump_observations(rdb_api, blob_api):
    facade = ERT.enkf_facade

    observation_keys = [
        facade.get_observation_key(nr)
        for nr, _ in enumerate(facade.get_observations())
    ]

    measured_data = MeasuredData(facade, observation_keys)

    measured_data.remove_inactive_observations()
    observations = measured_data.data.loc[["OBS", "STD"]]

    _dump_observations(rdb_api=rdb_api,
                       blob_api=blob_api,
                       observations=observations)
Example #16
0
def _load_measured_record(enkf_main):
    facade = LibresFacade(enkf_main)
    obs_keys = [
        facade.get_observation_key(nr)
        for nr, _ in enumerate(facade.get_observations())
    ]
    return MeasuredData(facade, obs_keys)
Example #17
0
def test_remove_failed_realizations(
    input_dataframe,
    expected_result,
    monkeypatch,
    facade,
    measured_data_setup,
    valid_obs_data,
):
    measured_data_setup(input_dataframe, valid_obs_data, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    md.remove_failed_realizations()

    expected_result.columns = _set_multiindex(expected_result)
    expected_result = pd.concat({"obs_key": expected_result}, axis=1)
    assert md.data.equals(expected_result)
Example #18
0
def test_remove_inactive_observations(
    input_header,
    measured_data,
    expected_result,
    monkeypatch,
    facade,
    measured_data_setup,
):
    input_header.columns = _set_multiindex(input_header)
    measured_data_setup(measured_data, input_header, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    expected_result.columns = _set_multiindex(expected_result)
    expected_result = pd.concat({"obs_key": expected_result}, axis=1)

    md.remove_inactive_observations()
    assert md.data.equals(expected_result)
Example #19
0
def test_no_storage_obs_only(monkeypatch, obs_key):
    shutil.rmtree("storage")
    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)
    md = MeasuredData(facade, [obs_key], load_data=False)
    assert set(md.data.columns.get_level_values(0)) == {obs_key}
Example #20
0
def test_gen_obs_and_summary(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1", "WOPR_OP1_9"])
    df.remove_inactive_observations()

    assert df.data.columns.get_level_values(0).to_list() == [
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WOPR_OP1_9",
    ]
    assert df.data.columns.get_level_values("data_index").to_list() == [
        400,
        800,
        1200,
        1800,
        8,
    ]
Example #21
0
def test_all_measured_snapshot(snapshot, facade_snake_oil):
    """
    While there is no guarantee that this snapshot is 100% correct, it does represent
    the current state of loading from storage for the snake_oil case.
    """
    obs_keys = facade_snake_oil.get_matching_wildcards()("*").strings
    measured_data = MeasuredData(facade_snake_oil, obs_keys)
    snapshot.assert_match(measured_data.data.to_csv(),
                          "snake_oil_measured_output.csv")
Example #22
0
def test_no_storage(monkeypatch, obs_key, expected_msg):
    shutil.rmtree("storage")
    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)
    with pytest.raises(
            loader.ResponseError,
            match=expected_msg,
    ):
        MeasuredData(facade, [obs_key])
Example #23
0
def test_block_obs(monkeypatch, tmpdir):
    """
    This test causes util_abort on some runs, so it will not be run by default
    as it is too flaky. I have chosen to leave it here as it could be useful when
    debugging. To run the test, run an ensemble_experiment on the snake_oil_field
    case to create a storage with BLOCK_OBS.
    """
    with tmpdir.as_cwd():
        test_data_dir = pathlib.Path(test_data_root) / "snake_oil_field"
        if not (test_data_dir / "storage").exists():
            pytest.skip()
        else:
            shutil.copytree(test_data_dir, "test_data")
            os.chdir("test_data")

            block_obs = """
            \nBLOCK_OBSERVATION RFT_2006
            {
               FIELD = PRESSURE;
               DATE  = 10/01/2010;
               SOURCE = SUMMARY;

               OBS P1 { I = 5;  J = 5;  K = 5;   VALUE = 100;  ERROR = 5; };
               OBS P2 { I = 1;  J = 3;  K = 8;   VALUE = 50;  ERROR = 2; };
            };
            """
            obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
            with obs_file.open(mode="a") as fin:
                fin.write(block_obs)

            res_config = ResConfig("snake_oil.ert")
            ert = EnKFMain(res_config)
            facade = LibresFacade(ert)

            df = MeasuredData(facade, ["RFT_2006"])
            df.remove_inactive_observations()
            assert all(
                df.data.columns.get_level_values("data_index").values ==
                [0, 1])
            assert all(
                df.data.columns.get_level_values("key_index").values == [0, 1])
Example #24
0
def _observation_scaling(facade, config):
    """
    Collects data, performs scaling and applies scaling, assumes validated input.
    """
    calculate_keys = [event.key for event in config.CALCULATE_KEYS.keys]
    index_lists = [event.index for event in config.CALCULATE_KEYS.keys]
    measured_data = MeasuredData(facade, calculate_keys, index_lists)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha)
    measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff)

    matrix = DataMatrix(measured_data.data)
    matrix.std_normalization(inplace=True)

    scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS)

    update_data = _create_active_lists(facade.get_observations(),
                                       config.UPDATE_KEYS.keys)

    _update_scaling(facade.get_observations(), scale_factor, update_data)
Example #25
0
def test_summary_obs_runtime(monkeypatch, copy_snake_oil):
    """
    This is mostly a regression test, as reading SUMMARY_OBS was very slow when using
    SUMMARY_OBSERVATION and not HISTORY_OBSERVATION where multiple observations
    were pointing to the same response. To simulate that we load the same observations
    though individual points, and also in one go. To avoid this test being flaky the
    we assert on the difference in runtime. The difference in runtime we assert on is
    set to 10x though it should be around 2x
    """

    obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
    with obs_file.open(mode="a") as fin:
        fin.write(create_summary_observation())

    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)

    start_time = time.time()
    foprh = MeasuredData(facade,
                         [f"FOPR_{restart}" for restart in range(1, 201)])
    summary_obs_time = time.time() - start_time

    start_time = time.time()
    fopr = MeasuredData(facade, ["FOPR"])
    history_obs_time = time.time() - start_time

    assert (fopr.data.columns.get_level_values("data_index").values.tolist() ==
            foprh.data.columns.get_level_values("data_index").values.tolist())

    result = foprh.get_simulated_data().values == fopr.get_simulated_data(
    ).values
    assert np.logical_and.reduce(result).all()
    assert summary_obs_time < 10 * history_obs_time
Example #26
0
def test_gen_obs_runtime(monkeypatch, copy_snake_oil, snapshot):
    obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
    with obs_file.open(mode="a") as fin:
        fin.write(create_general_observation())

    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)

    df = MeasuredData(facade,
                      [f"CUSTOM_DIFF_{restart}" for restart in range(500)])

    snapshot.assert_match(df.data.to_csv(), "snake_oil_gendata_output.csv")
Example #27
0
def _create_observation_transformation(ert, db_observations) -> List[dict]:
    observation_vectors = ert.get_observations()
    summary_obs_keys = observation_vectors.getTypedKeylist(
        EnkfObservationImplementationType.SUMMARY_OBS
    )
    active_obs = _extract_active_observations(ert)
    transformations: Dict = dict()
    keys = [ert.get_observation_key(i) for i, _ in enumerate(observation_vectors)]
    data = MeasuredData(ert, keys, load_data=False)
    observations = data.data.loc[["OBS", "STD"]]

    for obs_key, active_mask in active_obs.items():
        obs_data = _get_obs_data(obs_key, observations[obs_key])
        if obs_key in summary_obs_keys:
            obs_vec = observation_vectors[obs_key]
            data_key = obs_vec.getDataKey()
            if data_key in transformations:
                transformations[data_key]["x_axis"] += obs_data["x_axis"]
                transformations[data_key]["active"] += active_mask
                transformations[data_key]["scale"] += [1 for _ in active_mask]
            else:
                transformations[data_key] = dict(
                    name=data_key,
                    x_axis=obs_data["x_axis"],
                    scale=[1 for _ in active_mask],
                    active=active_mask,
                )
        else:
            # Scale is now mocked to 1 for now
            transformations[obs_key] = dict(
                name=obs_key,
                x_axis=obs_data["x_axis"],
                scale=[1 for _ in active_mask],
                active=active_mask,
            )
    observation_ids = {obs["name"]: obs["id"] for obs in db_observations}
    # Sorting by x_axis matches the transformation with the observation, mostly needed for grouped summary obs
    for key, obs in transformations.items():
        x_axis, active, scale = (
            list(t)
            for t in zip(*sorted(zip(obs["x_axis"], obs["active"], obs["scale"])))
        )
        x_axis = _prepare_x_axis(x_axis)
        transformations[key]["x_axis"] = x_axis
        transformations[key]["active"] = active
        transformations[key]["scale"] = scale
        transformations[key]["observation_id"] = observation_ids[key]

    return [transformation for _, transformation in transformations.items()]
Example #28
0
def _spearman_correlation(facade, obs_keys, threshold, dry_run):
    """
    Collects data, performs scaling and applies scaling, assumes validated input.
    """
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_std(1.0e-6)

    simulated_data = measured_data.get_simulated_data()

    correlation_matrix = _calculate_correlation_matrix(simulated_data)

    clusters = _cluster_analysis(correlation_matrix, threshold)

    columns = correlation_matrix.columns

    # Here the clusters are joined with the key and data index
    # to group the observations, the column level values are the column
    # headers, where key_index is the observation key and data_index
    # is a range.
    data = list(
        zip(
            clusters,
            columns.get_level_values(0),
            columns.get_level_values("data_index"),
        )
    )

    clustered_data = _cluster_data(data)

    job_configs = _config_creation(clustered_data)

    _output_clusters(clustered_data)

    if not dry_run:
        _run_scaling(facade, job_configs)
Example #29
0
def test_get_data(obs_type, monkeypatch, facade, valid_dataframe, measured_data_setup):

    facade.get_impl_type_name_for_obs_key.return_value = obs_type

    factory = measured_data_setup(valid_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"], index_lists=[[1, 2]])

    factory.assert_called_once_with(obs_type)
    mocked_loader = factory()
    mocked_loader.assert_called_once_with(facade, "test_key", "test_case", True)

    df = pd.DataFrame(
        data=[[2.0, 3.0], [5.0, 6.0]], index=["OBS", "STD"], columns=[1, 2]
    )
    df.columns = _set_multiindex(df)
    expected_result = pd.concat({"test_key": df}, axis=1)

    assert md._data.equals(expected_result)
    def run(self, job_config):
        facade = LibresFacade(self.ert())
        user_config = load_yaml(job_config)
        user_config = _insert_default_group(user_config)

        obs = facade.get_observations()
        obs_keys = [facade.get_observation_key(nr) for nr, _ in enumerate(obs)]
        obs_with_data = keys_with_data(
            obs,
            obs_keys,
            facade.get_ensemble_size(),
            facade.get_current_fs(),
        )

        for config in user_config:
            job = ScalingJob(obs_keys, obs, obs_with_data, config)
            measured_data = MeasuredData(facade, job.get_calc_keys(),
                                         job.get_index_lists())
            job.scale(measured_data)