def test_gen_kw_collector(self): with ErtTestContext("python/enkf/export/gen_kw_collector", self.config) as context: ert = context.getErt() data = GenKwCollector.loadAllGenKwData(ert, "default_0") self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_PERSISTENCE"][0], 0.047517) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_PERSISTENCE"][24], 0.160907) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_OFFSET"][0], 0.054539) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_OFFSET"][12], 0.057807) realization_20 = data.loc[20] with self.assertRaises(KeyError): realization_60 = data.loc[60] data = GenKwCollector.loadAllGenKwData(ert, "default_0", [ "SNAKE_OIL_PARAM:OP1_PERSISTENCE", "SNAKE_OIL_PARAM:OP1_OFFSET" ]) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_PERSISTENCE"][0], 0.047517) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_OFFSET"][0], 0.054539) with self.assertRaises(KeyError): data["SNAKE_OIL_PARAM:OP1_DIVERGENCE_SCALE"]
def test_smoother(self): test_config = self.createTestPath("local/custom_kw/mini_config") with ErtShellTestContext("python/ertshell/smoother", test_config) as shell: print(os.getcwd()) shell.invokeCommand("case select test_run") self.assertTrue(shell.invokeCommand("smoother update test_run_update")) shell.invokeCommand("case select test_run_update") ert = shell.shellContext().ert() data = GenKwCollector.loadAllGenKwData(ert, "test_run", keys=["PERLIN_PARAM:SCALE"]) update_data = GenKwCollector.loadAllGenKwData(ert, "test_run_update", keys=["PERLIN_PARAM:SCALE"]) self.assertTrue(data["PERLIN_PARAM:SCALE"].std() > update_data["PERLIN_PARAM:SCALE"].std())
def test_join(self): with ErtTestContext("python/enkf/export/export_join", self.config) as context: dumpDesignMatrix("DesignMatrix.txt") ert = context.getErt() summary_data = SummaryCollector.loadAllSummaryData(ert, "default_1") gen_kw_data = GenKwCollector.loadAllGenKwData(ert, "default_1") misfit = MisfitCollector.loadAllMisfitData(ert, "default_1") dm = DesignMatrixReader.loadDesignMatrix("DesignMatrix.txt") result = summary_data.join(gen_kw_data, how="inner") result = result.join(misfit, how="inner") result = result.join(dm, how="inner") first_date = "2010-01-10" last_date = "2015-06-23" self.assertFloatEqual( result["SNAKE_OIL_PARAM:OP1_OCTAVES"][0][first_date], 3.947766 ) self.assertFloatEqual( result["SNAKE_OIL_PARAM:OP1_OCTAVES"][24][first_date], 4.206698 ) self.assertFloatEqual( result["SNAKE_OIL_PARAM:OP1_OCTAVES"][24][last_date], 4.206698 ) self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][0][first_date], 0.08) self.assertEqual(result["EXTRA_INT_COLUMN"][0][first_date], 125) self.assertEqual(result["EXTRA_STRING_COLUMN"][0][first_date], "ON") self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][0][last_date], 0.08) self.assertEqual(result["EXTRA_INT_COLUMN"][0][last_date], 125) self.assertEqual(result["EXTRA_STRING_COLUMN"][0][last_date], "ON") self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][1][last_date], 0.07) self.assertEqual(result["EXTRA_INT_COLUMN"][1][last_date], 225) self.assertEqual(result["EXTRA_STRING_COLUMN"][1][last_date], "OFF") self.assertFloatEqual(result["MISFIT:FOPR"][0][last_date], 457.491003) self.assertFloatEqual(result["MISFIT:FOPR"][24][last_date], 1630.774198) self.assertFloatEqual(result["MISFIT:TOTAL"][0][first_date], 468.469969) self.assertFloatEqual(result["MISFIT:TOTAL"][0][last_date], 468.469969) self.assertFloatEqual(result["MISFIT:TOTAL"][24][last_date], 1714.662370) with self.assertRaises(KeyError): realization_13 = result.loc[60] column_count = len(result.columns) self.assertEqual(result.dtypes[0], numpy.float64) self.assertEqual(result.dtypes[column_count - 1], numpy.object) self.assertEqual(result.dtypes[column_count - 2], numpy.int64)
def run(self, output_file, case_list=None, design_matrix_path=None, infer_iteration=True): cases = [] if case_list is not None: if case_list.strip() == "*": cases = self.getAllCaseList() else: cases = case_list.split(",") if case_list is None or len(cases) == 0: cases = [self.ert().getEnkfFsManager().getCurrentFileSystem().getCaseName()] if design_matrix_path is not None: if not os.path.exists(design_matrix_path): raise UserWarning("The design matrix file does not exists!") if not os.path.isfile(design_matrix_path): raise UserWarning("The design matrix is not a file!") data = pandas.DataFrame() for index, case in enumerate(cases): case = case.strip() if not self.ert().getEnkfFsManager().caseExists(case): raise UserWarning("The case '%s' does not exist!" % case) if not self.ert().getEnkfFsManager().caseHasData(case): raise UserWarning("The case '%s' does not have any data!" % case) if infer_iteration: iteration_number = self.inferIterationNumber(case) else: iteration_number = index case_data = GenKwCollector.loadAllGenKwData(self.ert(), case) custom_kw_data = CustomKWCollector.loadAllCustomKWData(self.ert(), case) if not custom_kw_data.empty: case_data = case_data.join(custom_kw_data, how='outer') if design_matrix_path is not None: design_matrix_data = DesignMatrixReader.loadDesignMatrix(design_matrix_path) if not design_matrix_data.empty: case_data = case_data.join(design_matrix_data, how='outer') misfit_data = MisfitCollector.loadAllMisfitData(self.ert(), case) if not misfit_data.empty: case_data = case_data.join(misfit_data, how='outer') summary_data = SummaryCollector.loadAllSummaryData(self.ert(), case) if not summary_data.empty: case_data = case_data.join(summary_data, how='outer') else: case_data["Date"] = None case_data.set_index(["Date"], append=True, inplace=True) case_data["Iteration"] = iteration_number case_data["Case"] = case case_data.set_index(["Case", "Iteration"], append=True, inplace=True) data = pandas.concat([data, case_data]) data = data.reorder_levels(["Realization", "Iteration", "Date", "Case"]) data.to_csv(output_file) export_info = "Exported %d rows and %d columns to %s." % (len(data.index), len(data.columns), output_file) return export_info
def gatherGenKwData(ert, case, key): """ :rtype: pandas.DataFrame """ data = GenKwCollector.loadAllGenKwData(ert, case, [key]) return data[key].dropna()
def test_gen_kw_collector(self): with ErtTestContext("python/enkf/export/gen_kw_collector", self.config) as context: ert = context.getErt() data = GenKwCollector.loadAllGenKwData(ert, "default_0") self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_PERSISTENCE"][0], 0.047517) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_PERSISTENCE"][24], 0.160907) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_OFFSET"][0], 0.054539) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_OFFSET"][12], 0.057807) realization_20 = data.loc[20] with self.assertRaises(KeyError): realization_60 = data.loc[60] data = GenKwCollector.loadAllGenKwData( ert, "default_0", [ "SNAKE_OIL_PARAM:OP1_PERSISTENCE", "SNAKE_OIL_PARAM:OP1_OFFSET" ], ) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_PERSISTENCE"][0], 0.047517) self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_OFFSET"][0], 0.054539) with self.assertRaises(KeyError): data["SNAKE_OIL_PARAM:OP1_DIVERGENCE_SCALE"] realization_index = 10 data = GenKwCollector.loadAllGenKwData( ert, "default_0", ["SNAKE_OIL_PARAM:OP1_PERSISTENCE"], realization_index=realization_index, ) assert data.index == [realization_index] assert len(data.index) == 1 assert list(data.columns) == ["SNAKE_OIL_PARAM:OP1_PERSISTENCE"] self.assertFloatEqual(data["SNAKE_OIL_PARAM:OP1_PERSISTENCE"][10], 0.282923) non_existing_realization_index = 150 with pytest.raises(IndexError): data = GenKwCollector.loadAllGenKwData( ert, "default_0", ["SNAKE_OIL_PARAM:OP1_PERSISTENCE"], realization_index=non_existing_realization_index, )
def run(self, target_name="analysis_case", prior_name=None, group_by="data_key"): """Perform analysis of parameters change per obs group prior to posterior of ahm""" ert = self.ert() facade = LibresFacade(self.ert()) obs_keys = [ facade.get_observation_key(nr) for nr, _ in enumerate(facade.get_observations()) ] key_map = _group_observations(facade, obs_keys, group_by) prior_name, target_name = check_names( facade.get_current_case_name(), prior_name, target_name, ) # Get the prior scalar parameter distributions prior_data = GenKwCollector.loadAllGenKwData(ert, prior_name) raise_if_empty( dataframes=[ prior_data, MisfitCollector.loadAllMisfitData(ert, prior_name) ], messages=[ "Empty prior ensemble", "Empty parameters set for History Matching", ], ) # create dataframe with observations vectors (1 by 1 obs and also all_obs) combinations = make_obs_groups(key_map) field_parameters = sorted(ert.ensembleConfig().getKeylistFromImplType( ErtImplType.FIELD)) scalar_parameters = sorted(ert.ensembleConfig().getKeylistFromImplType( ErtImplType.GEN_KW)) # identify the set of actual parameters that was updated for now just go # through scalar parameters but in future if easier access to field parameter # updates should also include field parameters dkeysf = get_updated_parameters(prior_data, scalar_parameters) # setup dataframe for calculated data kolmogorov_smirnov_data, active_obs, misfitval = ( pd.DataFrame(sorted(dkeysf), columns=["Parameters"]), pd.DataFrame(), pd.DataFrame(index=["misfit"]), ) # loop over keys and calculate the KS matrix, # conditioning one parameter at the time. field_output = {} for group_name, obs_group in combinations.items(): print("Processing:", group_name) # Use localization to evaluate change of parameters for each observation with tempfile.TemporaryDirectory() as update_log_path: _run_ministep( ert, obs_group, field_parameters + scalar_parameters, prior_name, target_name, update_log_path, ) # Get the active vs total observation info df_update_log = make_update_log_df(update_log_path) # Get the updated scalar parameter distributions self.reporter.publish_csv( group_name, GenKwCollector.loadAllGenKwData(ert, target_name)) active_obs.at["ratio", group_name] = ( str(count_active_observations(df_update_log)) + " active/" + str(len(df_update_log.index))) # Get misfit values misfitval[group_name] = [ calc_observationsgroup_misfit( group_name, df_update_log, MisfitCollector.loadAllMisfitData(ert, prior_name), ) ] # Calculate Ks matrix for scalar parameters kolmogorov_smirnov_data[group_name] = kolmogorov_smirnov_data[ "Parameters"].map( calc_kolmogorov_smirnov( dkeysf, prior_data, GenKwCollector.loadAllGenKwData(ert, target_name), )) field_output[group_name] = _get_field_params( ert, facade.get_ensemble_size(), field_parameters, target_name) kolmogorov_smirnov_data.set_index("Parameters", inplace=True) # Calculate Ks matrix for Fields parameters if field_parameters: # Get grid characteristics to be able to plot field avg maps grid_xyzcenter = load_grid_to_dataframe( ert.eclConfig().get_gridfile()) all_input_prior = _get_field_params(ert, facade.get_ensemble_size(), field_parameters, prior_name) for fieldparam in field_parameters: scaler = StandardScaler() scaler.fit(all_input_prior[fieldparam]) pca = PCA(0.98).fit( pd.DataFrame(scaler.transform( all_input_prior[fieldparam]))) pc_fieldprior_df = pd.DataFrame(data=pca.transform( scaler.transform(all_input_prior[fieldparam]))) all_kolmogorov_smirnov = pd.DataFrame( pc_fieldprior_df.columns.tolist(), columns=["PCFieldParameters"]) # Get the posterior Field parameters map_calc_properties = (grid_xyzcenter[grid_xyzcenter["KZ"] == 1].copy().reset_index()) for group_name in combinations.keys(): map_calc_properties[ "Mean_D_" + group_name] = calc_mean_delta_grid( field_output[group_name][fieldparam], all_input_prior[fieldparam], grid_xyzcenter, ) pc_fieldpost_df = pd.DataFrame(data=pca.transform( scaler.transform(field_output[group_name] [fieldparam]))) all_kolmogorov_smirnov[ group_name] = all_kolmogorov_smirnov[ "PCFieldParameters"].map( calc_kolmogorov_smirnov( pc_fieldpost_df, pc_fieldprior_df, pc_fieldpost_df, )) all_kolmogorov_smirnov.set_index("PCFieldParameters", inplace=True) # add the field max Ks to the scalar Ks matrix kolmogorov_smirnov_data.loc[ "FIELD_" + fieldparam] = all_kolmogorov_smirnov.max() self.reporter.publish_csv("delta_field" + fieldparam, map_calc_properties) # save/export the Ks matrix, active_obs, misfitval and prior data self.reporter.publish_csv("ks", kolmogorov_smirnov_data) self.reporter.publish_csv("active_obs_info", active_obs) self.reporter.publish_csv("misfit_obs_info", misfitval) self.reporter.publish_csv("prior", prior_data)