def test_remove_failed_realizations( input_dataframe, expected_result, monkeypatch, facade, measured_data_setup ): measured_data_setup(input_dataframe, monkeypatch) md = MeasuredData(facade, ["test_key"]) md.remove_failed_realizations() expected_result.columns = _set_multiindex(expected_result) assert md.data.equals( pd.concat({"test_key": expected_result.astype(float)}, axis=1) )
def test_remove_failed_realizations( input_dataframe, expected_result, monkeypatch, facade, measured_data_setup, valid_obs_data, ): measured_data_setup(input_dataframe, valid_obs_data, monkeypatch) md = MeasuredData(facade, ["obs_key"]) md.remove_failed_realizations() expected_result.columns = _set_multiindex(expected_result) expected_result = pd.concat({"obs_key": expected_result}, axis=1) assert md.data.equals(expected_result)
def _observation_scaling(facade, config): """ Collects data, performs scaling and applies scaling, assumes validated input. """ calculate_keys = [event.key for event in config.CALCULATE_KEYS.keys] index_lists = [event.index for event in config.CALCULATE_KEYS.keys] measured_data = MeasuredData(facade, calculate_keys, index_lists) measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha) measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff) matrix = DataMatrix(measured_data.data) matrix.std_normalization(inplace=True) scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS) update_data = _create_active_lists(facade.get_observations(), config.UPDATE_KEYS.keys) _update_scaling(facade.get_observations(), scale_factor, update_data)
def _spearman_correlation(facade, obs_keys, threshold, dry_run): """ Collects data, performs scaling and applies scaling, assumes validated input. """ measured_data = MeasuredData(facade, obs_keys) measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_std(1.0e-6) simulated_data = measured_data.get_simulated_data() correlation_matrix = _calculate_correlation_matrix(simulated_data) clusters = _cluster_analysis(correlation_matrix, threshold) columns = correlation_matrix.columns # Here the clusters are joined with the key and data index # to group the observations, the column level values are the column # headers, where key_index is the observation key and data_index # is a range. data = list( zip( clusters, columns.get_level_values(0), columns.get_level_values("data_index"), ) ) clustered_data = _cluster_data(data) job_configs = _config_creation(clustered_data) _output_clusters(clustered_data) if not dry_run: _run_scaling(facade, job_configs)