def _load_measured_record(facade, obs_keys): measured_data = MeasuredData(facade, obs_keys) measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_mean_obs(facade.get_alpha()) measured_data.filter_ensemble_std(facade.get_std_cutoff()) return measured_data
def test_summary_obs(monkeypatch, facade_snake_oil): summary_obs = MeasuredData(facade_snake_oil, ["WOPR_OP1_72"]) summary_obs.remove_inactive_observations() assert all( summary_obs.data.columns.get_level_values("data_index").values == [71]) # Only one observation, we check the key_index is what we expect: assert summary_obs.data.columns.get_level_values( "key_index").values[0] == np.datetime64("2011-12-21")
def test_history_obs(monkeypatch, facade_snake_oil): fopr = MeasuredData(facade_snake_oil, ["FOPR"]) fopr.remove_inactive_observations() assert all( fopr.data.columns.get_level_values("data_index").values == list( range(199)))
def _get_measured_data( facade, observation_keys, observation_index_list, alpha, std_cutoff ): measured_data = MeasuredData(facade, observation_keys, observation_index_list) measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_mean_obs(alpha) measured_data.filter_ensemble_std(std_cutoff) return measured_data
def test_gen_obs(monkeypatch, facade_snake_oil): df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1"]) df.remove_inactive_observations() assert all( df.data.columns.get_level_values("data_index").values == [400, 800, 1200, 1800]) assert all( df.data.columns.get_level_values("key_index").values == [400, 800, 1200, 1800])
def test_remove_inactive_observations(input_dataframe, expected_result, monkeypatch, facade, measured_data_setup): measured_data_setup(input_dataframe, monkeypatch) md = MeasuredData(facade, ["test_key"]) expected_result.columns = _set_multiindex(expected_result) md.remove_inactive_observations() assert md.data.equals( pd.concat({"test_key": expected_result.astype(float)}, axis=1))
def _load_measured_record(enkf_main): facade = LibresFacade(enkf_main) obs_keys = [ facade.get_observation_key(nr) for nr, _ in enumerate(facade.get_observations()) ] measured_data = MeasuredData(facade, obs_keys) measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_mean_obs(facade.get_alpha()) measured_data.filter_ensemble_std(facade.get_std_cutoff()) return measured_data
def test_empty_dataset_from_remove_inactive_observations( input_header, measured_data, monkeypatch, facade, measured_data_setup, ): input_header.columns = _set_multiindex(input_header) measured_data_setup(measured_data, input_header, monkeypatch) md = MeasuredData(facade, ["obs_key"]) with pytest.raises(ValueError, match="This operation results in an empty dataset"): md.remove_inactive_observations()
def test_gen_obs_runtime(monkeypatch, copy_snake_oil): obs_file = pathlib.Path.cwd() / "observations" / "observations.txt" with obs_file.open(mode="a") as fin: fin.write(create_general_observation()) res_config = ResConfig("snake_oil.ert") ert = EnKFMain(res_config) facade = LibresFacade(ert) df = MeasuredData(facade, [f"CUSTOM_DIFF_{restart}" for restart in range(1, 500)]) df.remove_inactive_observations() assert df.data.shape == (27, 1995)
def test_gen_obs_and_summary_index_range(monkeypatch, facade_snake_oil): df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1", "FOPR"], [[800], [10]]) df.remove_inactive_observations() assert df.data.columns.get_level_values(0).to_list() == [ "WPR_DIFF_1", "FOPR", ] assert df.data.columns.get_level_values("data_index").to_list() == [ 800, 10, ] assert df.data.loc["OBS"].values == pytest.approx([0.1, 0.23281], abs=0.00001) assert df.data.loc["STD"].values == pytest.approx([0.2, 0.1])
def _extract_and_dump_observations(rdb_api, blob_api): facade = ERT.enkf_facade observation_keys = [ facade.get_observation_key(nr) for nr, _ in enumerate(facade.get_observations()) ] measured_data = MeasuredData(facade, observation_keys) measured_data.remove_inactive_observations() observations = measured_data.data.loc[["OBS", "STD"]] _dump_observations(rdb_api=rdb_api, blob_api=blob_api, observations=observations)
def test_remove_inactive_observations( input_header, measured_data, expected_result, monkeypatch, facade, measured_data_setup, ): input_header.columns = _set_multiindex(input_header) measured_data_setup(measured_data, input_header, monkeypatch) md = MeasuredData(facade, ["obs_key"]) expected_result.columns = _set_multiindex(expected_result) expected_result = pd.concat({"obs_key": expected_result}, axis=1) md.remove_inactive_observations() assert md.data.equals(expected_result)
def test_gen_obs_and_summary(monkeypatch, facade_snake_oil): df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1", "WOPR_OP1_9"]) df.remove_inactive_observations() assert df.data.columns.get_level_values(0).to_list() == [ "WPR_DIFF_1", "WPR_DIFF_1", "WPR_DIFF_1", "WPR_DIFF_1", "WOPR_OP1_9", ] assert df.data.columns.get_level_values("data_index").to_list() == [ 400, 800, 1200, 1800, 8, ]
def test_block_obs(monkeypatch, tmpdir): """ This test causes util_abort on some runs, so it will not be run by default as it is too flaky. I have chosen to leave it here as it could be useful when debugging. To run the test, run an ensemble_experiment on the snake_oil_field case to create a storage with BLOCK_OBS. """ with tmpdir.as_cwd(): test_data_dir = pathlib.Path(test_data_root) / "snake_oil_field" if not (test_data_dir / "storage").exists(): pytest.skip() else: shutil.copytree(test_data_dir, "test_data") os.chdir("test_data") block_obs = """ \nBLOCK_OBSERVATION RFT_2006 { FIELD = PRESSURE; DATE = 10/01/2010; SOURCE = SUMMARY; OBS P1 { I = 5; J = 5; K = 5; VALUE = 100; ERROR = 5; }; OBS P2 { I = 1; J = 3; K = 8; VALUE = 50; ERROR = 2; }; }; """ obs_file = pathlib.Path.cwd() / "observations" / "observations.txt" with obs_file.open(mode="a") as fin: fin.write(block_obs) res_config = ResConfig("snake_oil.ert") ert = EnKFMain(res_config) facade = LibresFacade(ert) df = MeasuredData(facade, ["RFT_2006"]) df.remove_inactive_observations() assert all( df.data.columns.get_level_values("data_index").values == [0, 1]) assert all( df.data.columns.get_level_values("key_index").values == [0, 1])
def _observation_scaling(facade, config): """ Collects data, performs scaling and applies scaling, assumes validated input. """ calculate_keys = [event.key for event in config.CALCULATE_KEYS.keys] index_lists = [event.index for event in config.CALCULATE_KEYS.keys] measured_data = MeasuredData(facade, calculate_keys, index_lists) measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha) measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff) matrix = DataMatrix(measured_data.data) matrix.std_normalization(inplace=True) scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS) update_data = _create_active_lists(facade.get_observations(), config.UPDATE_KEYS.keys) _update_scaling(facade.get_observations(), scale_factor, update_data)
def _spearman_correlation(facade, obs_keys, threshold, dry_run): """ Collects data, performs scaling and applies scaling, assumes validated input. """ measured_data = MeasuredData(facade, obs_keys) measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_std(1.0e-6) simulated_data = measured_data.get_simulated_data() correlation_matrix = _calculate_correlation_matrix(simulated_data) clusters = _cluster_analysis(correlation_matrix, threshold) columns = correlation_matrix.columns # Here the clusters are joined with the key and data index # to group the observations, the column level values are the column # headers, where key_index is the observation key and data_index # is a range. data = list( zip( clusters, columns.get_level_values(0), columns.get_level_values("data_index"), ) ) clustered_data = _cluster_data(data) job_configs = _config_creation(clustered_data) _output_clusters(clustered_data) if not dry_run: _run_scaling(facade, job_configs)