Example #1
0
def test_summary_obs_runtime(monkeypatch, copy_snake_oil):
    """
    This is mostly a regression test, as reading SUMMARY_OBS was very slow when using
    SUMMARY_OBSERVATION and not HISTORY_OBSERVATION where multiple observations
    were pointing to the same response. To simulate that we load the same observations
    though individual points, and also in one go. To avoid this test being flaky the
    we assert on the difference in runtime. The difference in runtime we assert on is
    set to 10x though it should be around 2x
    """

    obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
    with obs_file.open(mode="a") as fin:
        fin.write(create_summary_observation())

    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)

    start_time = time.time()
    foprh = MeasuredData(facade,
                         [f"FOPR_{restart}" for restart in range(1, 201)])
    summary_obs_time = time.time() - start_time

    start_time = time.time()
    fopr = MeasuredData(facade, ["FOPR"])
    history_obs_time = time.time() - start_time

    assert (fopr.data.columns.get_level_values("data_index").values.tolist() ==
            foprh.data.columns.get_level_values("data_index").values.tolist())

    result = foprh.get_simulated_data().values == fopr.get_simulated_data(
    ).values
    assert np.logical_and.reduce(result).all()
    assert summary_obs_time < 10 * history_obs_time
Example #2
0
def test_get_simulated_data(input_dataframe, expected_result, monkeypatch,
                            facade, measured_data_setup):

    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    expected_result.columns = _set_multiindex(expected_result)

    result = md.get_simulated_data()
    assert result.equals(
        pd.concat({"test_key": expected_result.astype(float)}, axis=1))
Example #3
0
def _spearman_correlation(facade, obs_keys, threshold, dry_run):
    """
    Collects data, performs scaling and applies scaling, assumes validated input.
    """
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_std(1.0e-6)

    simulated_data = measured_data.get_simulated_data()

    correlation_matrix = _calculate_correlation_matrix(simulated_data)

    clusters = _cluster_analysis(correlation_matrix, threshold)

    columns = correlation_matrix.columns

    # Here the clusters are joined with the key and data index
    # to group the observations, the column level values are the column
    # headers, where key_index is the observation key and data_index
    # is a range.
    data = list(
        zip(
            clusters,
            columns.get_level_values(0),
            columns.get_level_values("data_index"),
        )
    )

    clustered_data = _cluster_data(data)

    job_configs = _config_creation(clustered_data)

    _output_clusters(clustered_data)

    if not dry_run:
        _run_scaling(facade, job_configs)