Esempio n. 1
0
def test_misfit_preprocessor_configuration_errors():
    observations, simulated = generate_measurements(1)
    measured_data = MockedMeasuredData(observations, simulated)

    config = {
        "unknown_key": [],
        "clustering": {
            "method": "spearman_correlation",
            "spearman_correlation": {
                "fcluster": {
                    "threshold": 1.0
                }
            },
        },
    }
    reporter_mock = Mock()
    with pytest.raises(misfit_preprocessor.ValidationError) as ve:
        misfit_preprocessor.run(config, measured_data, reporter_mock)

    expected_err_msg = (
        "Invalid configuration of misfit preprocessor\n"
        "  - Unknown key: unknown_key (root level)\n"
        "  - Unknown key: threshold (clustering.spearman_correlation.fcluster)\n"
    )
    assert expected_err_msg == str(ve.value)
Esempio n. 2
0
    def run(self, *args):
        facade = LibresFacade(self.ert())
        config_record = _fetch_config_record(args)
        observations = _get_observations(facade)
        config = assemble_config(config_record, observations)
        config = config.snapshot

        measured_record = _load_measured_record(facade, config.observations)
        scaling_configs = misfit_preprocessor.run(
            **{
                "config": config,
                "measured_data": measured_record,
                "reporter": self.reporter,
            })

        # The execution of COS should be moved into
        # misfit_preprocessor.run when COS no longer depend on self.ert
        # to run.
        scaling_params = _fetch_scaling_parameters(config_record, observations)
        for scaling_config in scaling_configs:
            scaling_config["CALCULATE_KEYS"].update(scaling_params)

        try:
            CorrelatedObservationsScalingJob(self.ert()).run(scaling_configs)
        except EmptyDatasetException:
            pass
Esempio n. 3
0
def test_misfit_preprocessor_n_polynomials_w_correlation(num_polynomials):
    state_size = 3
    poly_states = [range(1, state_size + 1) for _ in range(num_polynomials)]

    observations, simulated = generate_measurements(
        num_polynomials,
        poly_states=poly_states,
        ensemble_size=10000,
    )
    measured_data = MockedMeasuredData(observations, simulated)

    # We add a correlation:
    measured_data.data["poly_0"] = measured_data.data["poly_1"] * 2.0

    config = {
        "clustering": {
            "method": "spearman_correlation"
        },
        "scaling": {
            "threshold": 0.99
        },
    }
    reporter_mock = Mock()
    configs = misfit_preprocessor.run(config, measured_data, reporter_mock)
    assert num_polynomials == len(configs) - 1, configs
Esempio n. 4
0
def test_misfit_preprocessor_state_uneven_size(state_size):
    num_polynomials = len(state_size)
    poly_states = [range(1, size + 1) for size in state_size]

    observations, simulated = generate_measurements(
        num_polynomials,
        poly_states=poly_states,
        ensemble_size=30000,
    )
    measured_data = MockedMeasuredData(observations, simulated)

    config = {
        "clustering": {
            "method": "spearman_correlation",
            "spearman_correlation": {
                "fcluster": {
                    "t": num_polynomials + 1,
                    "criterion": "maxclust"
                }
            },
        }
    }
    reporter_mock = Mock()
    configs = misfit_preprocessor.run(config, measured_data, reporter_mock)
    assert num_polynomials == len(configs), configs
    assert_homogen_clusters(configs)
Esempio n. 5
0
def test_misfit_preprocessor_state_size(state_size, method, linkage):
    if state_size == [5, 5, 5, 5, 100]:
        if linkage == "average":
            pytest.skip("Produces wrong number of clusters")
        elif method == "auto_scale":
            pytest.skip(
                "Produces not homogeneous clusters due to PCA analysis")

    num_polynomials = 5
    poly_states = [range(1, size + 1) for size in state_size]

    observations, simulated = generate_measurements(
        num_polynomials,
        poly_states=poly_states,
        ensemble_size=30000,
    )
    measured_data = MockedMeasuredData(observations, simulated)

    config = {
        "clustering": {
            "method": method,
            method: {
                "linkage": {
                    "method": linkage
                }
            }
        },
        "scaling": {
            "threshold": 0.99
        },
    }
    reporter_mock = Mock()
    configs = misfit_preprocessor.run(config, measured_data, reporter_mock)
    assert_homogen_clusters(configs)
    assert num_polynomials == len(configs), configs
def test_misfit_preprocessor_n_polynomials(num_polynomials, method):
    """
    The goal of this test is to create a data set of uncorrelated polynomials,
    meaning that there should be as many clusters as there are input polynomials.
    """
    state_size = 3
    poly_states = [range(1, state_size + 1) for _ in range(num_polynomials)]

    observations, simulated = generate_measurements(
        num_polynomials,
        poly_states=poly_states,
        ensemble_size=10000,
    )
    measured_data = MockedMeasuredData(observations, simulated)
    # We set the PCA threshold to 0.99 so a high degree of correlation is required
    # to have an impact. Setting it this way only has an impact for "auto_scale"
    obs_keys = measured_data.data.columns.get_level_values(0)
    config = assemble_config(
        {
            "clustering": {
                "method": method
            },
            "scaling": {
                "threshold": 0.99
            }
        },
        obs_keys,
    )
    reporter_mock = Mock()
    configs = misfit_preprocessor.run(config.snapshot, measured_data,
                                      reporter_mock)
    assert_homogen_clusters(configs)
    assert num_polynomials == len(configs), configs
Esempio n. 7
0
    def run(self, *args):
        config_record = _fetch_config_record(args)
        measured_record = _load_measured_record(self.ert())
        scaling_configs = misfit_preprocessor.run(
            **{
                "misfit_preprocessor_config": config_record,
                "measured_data": measured_record,
                "reporter": self.reporter,
            })

        # The execution of COS should be moved into
        # misfit_preprocessor.run when COS no longer depend on self.ert
        # to run.
        scaling_params = _fetch_scaling_parameters(config_record,
                                                   measured_record)
        for scaling_config in scaling_configs:
            scaling_config["CALCULATE_KEYS"].update(scaling_params)

        try:
            CorrelatedObservationsScalingJob(self.ert()).run(scaling_configs)
        except EmptyDatasetException:
            pass