Example #1
0
def test_remove_failed_realizations(
    input_dataframe, expected_result, monkeypatch, facade, measured_data_setup
):
    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    md.remove_failed_realizations()

    expected_result.columns = _set_multiindex(expected_result)

    assert md.data.equals(
        pd.concat({"test_key": expected_result.astype(float)}, axis=1)
    )
Example #2
0
def test_remove_failed_realizations(
    input_dataframe,
    expected_result,
    monkeypatch,
    facade,
    measured_data_setup,
    valid_obs_data,
):
    measured_data_setup(input_dataframe, valid_obs_data, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    md.remove_failed_realizations()

    expected_result.columns = _set_multiindex(expected_result)
    expected_result = pd.concat({"obs_key": expected_result}, axis=1)
    assert md.data.equals(expected_result)
Example #3
0
def _observation_scaling(facade, config):
    """
    Collects data, performs scaling and applies scaling, assumes validated input.
    """
    calculate_keys = [event.key for event in config.CALCULATE_KEYS.keys]
    index_lists = [event.index for event in config.CALCULATE_KEYS.keys]
    measured_data = MeasuredData(facade, calculate_keys, index_lists)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha)
    measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff)

    matrix = DataMatrix(measured_data.data)
    matrix.std_normalization(inplace=True)

    scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS)

    update_data = _create_active_lists(facade.get_observations(),
                                       config.UPDATE_KEYS.keys)

    _update_scaling(facade.get_observations(), scale_factor, update_data)
Example #4
0
def _spearman_correlation(facade, obs_keys, threshold, dry_run):
    """
    Collects data, performs scaling and applies scaling, assumes validated input.
    """
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_std(1.0e-6)

    simulated_data = measured_data.get_simulated_data()

    correlation_matrix = _calculate_correlation_matrix(simulated_data)

    clusters = _cluster_analysis(correlation_matrix, threshold)

    columns = correlation_matrix.columns

    # Here the clusters are joined with the key and data index
    # to group the observations, the column level values are the column
    # headers, where key_index is the observation key and data_index
    # is a range.
    data = list(
        zip(
            clusters,
            columns.get_level_values(0),
            columns.get_level_values("data_index"),
        )
    )

    clustered_data = _cluster_data(data)

    job_configs = _config_creation(clustered_data)

    _output_clusters(clustered_data)

    if not dry_run:
        _run_scaling(facade, job_configs)