def test_join(self): with ErtTestContext("python/enkf/export/export_join", self.config) as context: dumpDesignMatrix("DesignMatrix.txt") ert = context.getErt() summary_data = SummaryCollector.loadAllSummaryData(ert, "default_1") gen_kw_data = GenKwCollector.loadAllGenKwData(ert, "default_1") misfit = MisfitCollector.loadAllMisfitData(ert, "default_1") dm = DesignMatrixReader.loadDesignMatrix("DesignMatrix.txt") result = summary_data.join(gen_kw_data, how="inner") result = result.join(misfit, how="inner") result = result.join(dm, how="inner") first_date = "2010-01-10" last_date = "2015-06-23" self.assertFloatEqual( result["SNAKE_OIL_PARAM:OP1_OCTAVES"][0][first_date], 3.947766 ) self.assertFloatEqual( result["SNAKE_OIL_PARAM:OP1_OCTAVES"][24][first_date], 4.206698 ) self.assertFloatEqual( result["SNAKE_OIL_PARAM:OP1_OCTAVES"][24][last_date], 4.206698 ) self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][0][first_date], 0.08) self.assertEqual(result["EXTRA_INT_COLUMN"][0][first_date], 125) self.assertEqual(result["EXTRA_STRING_COLUMN"][0][first_date], "ON") self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][0][last_date], 0.08) self.assertEqual(result["EXTRA_INT_COLUMN"][0][last_date], 125) self.assertEqual(result["EXTRA_STRING_COLUMN"][0][last_date], "ON") self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][1][last_date], 0.07) self.assertEqual(result["EXTRA_INT_COLUMN"][1][last_date], 225) self.assertEqual(result["EXTRA_STRING_COLUMN"][1][last_date], "OFF") self.assertFloatEqual(result["MISFIT:FOPR"][0][last_date], 457.491003) self.assertFloatEqual(result["MISFIT:FOPR"][24][last_date], 1630.774198) self.assertFloatEqual(result["MISFIT:TOTAL"][0][first_date], 468.469969) self.assertFloatEqual(result["MISFIT:TOTAL"][0][last_date], 468.469969) self.assertFloatEqual(result["MISFIT:TOTAL"][24][last_date], 1714.662370) with self.assertRaises(KeyError): realization_13 = result.loc[60] column_count = len(result.columns) self.assertEqual(result.dtypes[0], numpy.float64) self.assertEqual(result.dtypes[column_count - 1], numpy.object) self.assertEqual(result.dtypes[column_count - 2], numpy.int64)
def test_misfit_collector(self): with ErtTestContext("python/enkf/export/misfit_collector", self.config) as context: ert = context.getErt() data = MisfitCollector.loadAllMisfitData(ert, "default_0") self.assertFloatEqual(data["MISFIT:FOPR"][0], 798.378619) self.assertFloatEqual(data["MISFIT:FOPR"][24], 1332.219633) self.assertFloatEqual(data["MISFIT:TOTAL"][0], 826.651491) self.assertFloatEqual(data["MISFIT:TOTAL"][24], 1431.305646) realization_20 = data.loc[20] with self.assertRaises(KeyError): realization_60 = data.loc[60]
def test_misfit_collector(self): with ErtTestContext("python/enkf/export/misfit_collector", self.config) as context: ert = context.getErt() data = MisfitCollector.loadAllMisfitData(ert, "default_0") self.assertFloatEqual(data["MISFIT:FOPR"][0], 737.436374) self.assertFloatEqual(data["MISFIT:FOPR"][24], 1258.644538) self.assertFloatEqual(data["MISFIT:TOTAL"][0], 765.709246) self.assertFloatEqual(data["MISFIT:TOTAL"][24], 1357.730551) realization_20 = data.loc[20] with self.assertRaises(KeyError): realization_60 = data.loc[60]
def test_misfit_collector(self): with ErtTestContext("python/enkf/export/misfit_collector", self.config) as context: ert = context.getErt() data = MisfitCollector.loadAllMisfitData(ert, "default_0") self.assertFloatEqual(data["MISFIT:FOPR"][0], 738.735586) self.assertFloatEqual(data["MISFIT:FOPR"][24], 1260.086789) self.assertFloatEqual(data["MISFIT:TOTAL"][0], 767.008457) self.assertFloatEqual(data["MISFIT:TOTAL"][24], 1359.172803) realization_20 = data.loc[20] with self.assertRaises(KeyError): realization_60 = data.loc[60]
def create_update_data(ert): fs = ert.get_current_fs() realizations = MisfitCollector.createActiveList(_EnKFMain(ert), fs) active_obs = _extract_active_observations(ert) for obs_vector in ert.get_observations(): obs_key = obs_vector.getObservationKey() resp_key = obs_vector.getDataKey() active_blob = active_obs[obs_key] if active_obs else None yield js.MisfitCreate( observation_key=obs_key, response_definition_key=resp_key, active=active_blob, realizations={ index: obs_vector.getTotalChi2(fs, index) for index in realizations }, )
def _extract_and_dump_update_data(ensemble_id, ensemble_name, rdb_api, blob_api): facade = ERT.enkf_facade fs = facade.get_current_fs() realizations = MisfitCollector.createActiveList(ERT.ert, fs) active_observations = _extract_active_observations(facade) ensemble = rdb_api.get_ensemble_by_id(ensemble_id=ensemble_id) update_id = ensemble.parent.id if ensemble.parent is not None else None for obs_vector in facade.get_observations(): observation_key = obs_vector.getObservationKey() response_key = obs_vector.getDataKey() response_definition = rdb_api._get_response_definition( response_key, ensemble_id) if active_observations is not None: active_blob = blob_api.add_blob( active_observations[observation_key]) blob_api.flush() observation = rdb_api.get_observation(observation_key) link = rdb_api._add_observation_response_definition_link( observation_id=observation.id, response_definition_id=response_definition.id, active_ref=active_blob.id if active_observations is not None else None, update_id=update_id, ) for realization_number in realizations: response = rdb_api.get_response( name=response_key, realization_index=realization_number, ensemble_name=ensemble_name, ) misfit_value = obs_vector.getTotalChi2(fs, realization_number) rdb_api._add_misfit(value=misfit_value, link_id=link.id, response_id=response.id)
def run(self, output_file, case_list=None, design_matrix_path=None, infer_iteration=True): cases = [] if case_list is not None: if case_list.strip() == "*": cases = self.getAllCaseList() else: cases = case_list.split(",") if case_list is None or len(cases) == 0: cases = [self.ert().getEnkfFsManager().getCurrentFileSystem().getCaseName()] if design_matrix_path is not None: if not os.path.exists(design_matrix_path): raise UserWarning("The design matrix file does not exists!") if not os.path.isfile(design_matrix_path): raise UserWarning("The design matrix is not a file!") data = pandas.DataFrame() for index, case in enumerate(cases): case = case.strip() if not self.ert().getEnkfFsManager().caseExists(case): raise UserWarning("The case '%s' does not exist!" % case) if not self.ert().getEnkfFsManager().caseHasData(case): raise UserWarning("The case '%s' does not have any data!" % case) if infer_iteration: iteration_number = self.inferIterationNumber(case) else: iteration_number = index case_data = GenKwCollector.loadAllGenKwData(self.ert(), case) custom_kw_data = CustomKWCollector.loadAllCustomKWData(self.ert(), case) if not custom_kw_data.empty: case_data = case_data.join(custom_kw_data, how='outer') if design_matrix_path is not None: design_matrix_data = DesignMatrixReader.loadDesignMatrix(design_matrix_path) if not design_matrix_data.empty: case_data = case_data.join(design_matrix_data, how='outer') misfit_data = MisfitCollector.loadAllMisfitData(self.ert(), case) if not misfit_data.empty: case_data = case_data.join(misfit_data, how='outer') summary_data = SummaryCollector.loadAllSummaryData(self.ert(), case) if not summary_data.empty: case_data = case_data.join(summary_data, how='outer') else: case_data["Date"] = None case_data.set_index(["Date"], append=True, inplace=True) case_data["Iteration"] = iteration_number case_data["Case"] = case case_data.set_index(["Case", "Iteration"], append=True, inplace=True) data = pandas.concat([data, case_data]) data = data.reorder_levels(["Realization", "Iteration", "Date", "Case"]) data.to_csv(output_file) export_info = "Exported %d rows and %d columns to %s." % (len(data.index), len(data.columns), output_file) return export_info
def run(self, target_name="analysis_case", prior_name=None, group_by="data_key"): """Perform analysis of parameters change per obs group prior to posterior of ahm""" ert = self.ert() facade = LibresFacade(self.ert()) obs_keys = [ facade.get_observation_key(nr) for nr, _ in enumerate(facade.get_observations()) ] key_map = _group_observations(facade, obs_keys, group_by) prior_name, target_name = check_names( facade.get_current_case_name(), prior_name, target_name, ) # Get the prior scalar parameter distributions prior_data = GenKwCollector.loadAllGenKwData(ert, prior_name) raise_if_empty( dataframes=[ prior_data, MisfitCollector.loadAllMisfitData(ert, prior_name) ], messages=[ "Empty prior ensemble", "Empty parameters set for History Matching", ], ) # create dataframe with observations vectors (1 by 1 obs and also all_obs) combinations = make_obs_groups(key_map) field_parameters = sorted(ert.ensembleConfig().getKeylistFromImplType( ErtImplType.FIELD)) scalar_parameters = sorted(ert.ensembleConfig().getKeylistFromImplType( ErtImplType.GEN_KW)) # identify the set of actual parameters that was updated for now just go # through scalar parameters but in future if easier access to field parameter # updates should also include field parameters dkeysf = get_updated_parameters(prior_data, scalar_parameters) # setup dataframe for calculated data kolmogorov_smirnov_data, active_obs, misfitval = ( pd.DataFrame(sorted(dkeysf), columns=["Parameters"]), pd.DataFrame(), pd.DataFrame(index=["misfit"]), ) # loop over keys and calculate the KS matrix, # conditioning one parameter at the time. field_output = {} for group_name, obs_group in combinations.items(): print("Processing:", group_name) # Use localization to evaluate change of parameters for each observation with tempfile.TemporaryDirectory() as update_log_path: _run_ministep( ert, obs_group, field_parameters + scalar_parameters, prior_name, target_name, update_log_path, ) # Get the active vs total observation info df_update_log = make_update_log_df(update_log_path) # Get the updated scalar parameter distributions self.reporter.publish_csv( group_name, GenKwCollector.loadAllGenKwData(ert, target_name)) active_obs.at["ratio", group_name] = ( str(count_active_observations(df_update_log)) + " active/" + str(len(df_update_log.index))) # Get misfit values misfitval[group_name] = [ calc_observationsgroup_misfit( group_name, df_update_log, MisfitCollector.loadAllMisfitData(ert, prior_name), ) ] # Calculate Ks matrix for scalar parameters kolmogorov_smirnov_data[group_name] = kolmogorov_smirnov_data[ "Parameters"].map( calc_kolmogorov_smirnov( dkeysf, prior_data, GenKwCollector.loadAllGenKwData(ert, target_name), )) field_output[group_name] = _get_field_params( ert, facade.get_ensemble_size(), field_parameters, target_name) kolmogorov_smirnov_data.set_index("Parameters", inplace=True) # Calculate Ks matrix for Fields parameters if field_parameters: # Get grid characteristics to be able to plot field avg maps grid_xyzcenter = load_grid_to_dataframe( ert.eclConfig().get_gridfile()) all_input_prior = _get_field_params(ert, facade.get_ensemble_size(), field_parameters, prior_name) for fieldparam in field_parameters: scaler = StandardScaler() scaler.fit(all_input_prior[fieldparam]) pca = PCA(0.98).fit( pd.DataFrame(scaler.transform( all_input_prior[fieldparam]))) pc_fieldprior_df = pd.DataFrame(data=pca.transform( scaler.transform(all_input_prior[fieldparam]))) all_kolmogorov_smirnov = pd.DataFrame( pc_fieldprior_df.columns.tolist(), columns=["PCFieldParameters"]) # Get the posterior Field parameters map_calc_properties = (grid_xyzcenter[grid_xyzcenter["KZ"] == 1].copy().reset_index()) for group_name in combinations.keys(): map_calc_properties[ "Mean_D_" + group_name] = calc_mean_delta_grid( field_output[group_name][fieldparam], all_input_prior[fieldparam], grid_xyzcenter, ) pc_fieldpost_df = pd.DataFrame(data=pca.transform( scaler.transform(field_output[group_name] [fieldparam]))) all_kolmogorov_smirnov[ group_name] = all_kolmogorov_smirnov[ "PCFieldParameters"].map( calc_kolmogorov_smirnov( pc_fieldpost_df, pc_fieldprior_df, pc_fieldpost_df, )) all_kolmogorov_smirnov.set_index("PCFieldParameters", inplace=True) # add the field max Ks to the scalar Ks matrix kolmogorov_smirnov_data.loc[ "FIELD_" + fieldparam] = all_kolmogorov_smirnov.max() self.reporter.publish_csv("delta_field" + fieldparam, map_calc_properties) # save/export the Ks matrix, active_obs, misfitval and prior data self.reporter.publish_csv("ks", kolmogorov_smirnov_data) self.reporter.publish_csv("active_obs_info", active_obs) self.reporter.publish_csv("misfit_obs_info", misfitval) self.reporter.publish_csv("prior", prior_data)