Example #1
0
    def test_join(self):

        with ErtTestContext("python/enkf/export/export_join", self.config) as context:
            dumpDesignMatrix("DesignMatrix.txt")
            ert = context.getErt()

            summary_data = SummaryCollector.loadAllSummaryData(ert, "default_1")
            gen_kw_data = GenKwCollector.loadAllGenKwData(ert, "default_1")
            misfit = MisfitCollector.loadAllMisfitData(ert, "default_1")
            dm = DesignMatrixReader.loadDesignMatrix("DesignMatrix.txt")

            result = summary_data.join(gen_kw_data, how="inner")
            result = result.join(misfit, how="inner")
            result = result.join(dm, how="inner")

            first_date = "2010-01-10"
            last_date = "2015-06-23"

            self.assertFloatEqual(
                result["SNAKE_OIL_PARAM:OP1_OCTAVES"][0][first_date], 3.947766
            )
            self.assertFloatEqual(
                result["SNAKE_OIL_PARAM:OP1_OCTAVES"][24][first_date], 4.206698
            )
            self.assertFloatEqual(
                result["SNAKE_OIL_PARAM:OP1_OCTAVES"][24][last_date], 4.206698
            )

            self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][0][first_date], 0.08)
            self.assertEqual(result["EXTRA_INT_COLUMN"][0][first_date], 125)
            self.assertEqual(result["EXTRA_STRING_COLUMN"][0][first_date], "ON")

            self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][0][last_date], 0.08)
            self.assertEqual(result["EXTRA_INT_COLUMN"][0][last_date], 125)
            self.assertEqual(result["EXTRA_STRING_COLUMN"][0][last_date], "ON")

            self.assertFloatEqual(result["EXTRA_FLOAT_COLUMN"][1][last_date], 0.07)
            self.assertEqual(result["EXTRA_INT_COLUMN"][1][last_date], 225)
            self.assertEqual(result["EXTRA_STRING_COLUMN"][1][last_date], "OFF")

            self.assertFloatEqual(result["MISFIT:FOPR"][0][last_date], 457.491003)
            self.assertFloatEqual(result["MISFIT:FOPR"][24][last_date], 1630.774198)

            self.assertFloatEqual(result["MISFIT:TOTAL"][0][first_date], 468.469969)
            self.assertFloatEqual(result["MISFIT:TOTAL"][0][last_date], 468.469969)
            self.assertFloatEqual(result["MISFIT:TOTAL"][24][last_date], 1714.662370)

            with self.assertRaises(KeyError):
                realization_13 = result.loc[60]

            column_count = len(result.columns)
            self.assertEqual(result.dtypes[0], numpy.float64)
            self.assertEqual(result.dtypes[column_count - 1], numpy.object)
            self.assertEqual(result.dtypes[column_count - 2], numpy.int64)
Example #2
0
    def test_misfit_collector(self):
        with ErtTestContext("python/enkf/export/misfit_collector", self.config) as context:
            ert = context.getErt()
            data = MisfitCollector.loadAllMisfitData(ert, "default_0")

            self.assertFloatEqual(data["MISFIT:FOPR"][0], 798.378619)
            self.assertFloatEqual(data["MISFIT:FOPR"][24], 1332.219633)

            self.assertFloatEqual(data["MISFIT:TOTAL"][0], 826.651491)
            self.assertFloatEqual(data["MISFIT:TOTAL"][24], 1431.305646)

            realization_20 = data.loc[20]

            with self.assertRaises(KeyError):
                realization_60 = data.loc[60]
Example #3
0
    def test_misfit_collector(self):
        with ErtTestContext("python/enkf/export/misfit_collector",
                            self.config) as context:
            ert = context.getErt()
            data = MisfitCollector.loadAllMisfitData(ert, "default_0")

            self.assertFloatEqual(data["MISFIT:FOPR"][0], 737.436374)
            self.assertFloatEqual(data["MISFIT:FOPR"][24], 1258.644538)

            self.assertFloatEqual(data["MISFIT:TOTAL"][0], 765.709246)
            self.assertFloatEqual(data["MISFIT:TOTAL"][24], 1357.730551)

            realization_20 = data.loc[20]

            with self.assertRaises(KeyError):
                realization_60 = data.loc[60]
Example #4
0
    def test_misfit_collector(self):
        with ErtTestContext("python/enkf/export/misfit_collector",
                            self.config) as context:
            ert = context.getErt()
            data = MisfitCollector.loadAllMisfitData(ert, "default_0")

            self.assertFloatEqual(data["MISFIT:FOPR"][0], 738.735586)
            self.assertFloatEqual(data["MISFIT:FOPR"][24], 1260.086789)

            self.assertFloatEqual(data["MISFIT:TOTAL"][0], 767.008457)
            self.assertFloatEqual(data["MISFIT:TOTAL"][24], 1359.172803)

            realization_20 = data.loc[20]

            with self.assertRaises(KeyError):
                realization_60 = data.loc[60]
Example #5
0
def create_update_data(ert):
    fs = ert.get_current_fs()
    realizations = MisfitCollector.createActiveList(_EnKFMain(ert), fs)

    active_obs = _extract_active_observations(ert)

    for obs_vector in ert.get_observations():
        obs_key = obs_vector.getObservationKey()
        resp_key = obs_vector.getDataKey()
        active_blob = active_obs[obs_key] if active_obs else None

        yield js.MisfitCreate(
            observation_key=obs_key,
            response_definition_key=resp_key,
            active=active_blob,
            realizations={
                index: obs_vector.getTotalChi2(fs, index) for index in realizations
            },
        )
Example #6
0
def _extract_and_dump_update_data(ensemble_id, ensemble_name, rdb_api,
                                  blob_api):
    facade = ERT.enkf_facade

    fs = facade.get_current_fs()
    realizations = MisfitCollector.createActiveList(ERT.ert, fs)

    active_observations = _extract_active_observations(facade)
    ensemble = rdb_api.get_ensemble_by_id(ensemble_id=ensemble_id)
    update_id = ensemble.parent.id if ensemble.parent is not None else None

    for obs_vector in facade.get_observations():
        observation_key = obs_vector.getObservationKey()
        response_key = obs_vector.getDataKey()
        response_definition = rdb_api._get_response_definition(
            response_key, ensemble_id)

        if active_observations is not None:
            active_blob = blob_api.add_blob(
                active_observations[observation_key])
            blob_api.flush()

        observation = rdb_api.get_observation(observation_key)
        link = rdb_api._add_observation_response_definition_link(
            observation_id=observation.id,
            response_definition_id=response_definition.id,
            active_ref=active_blob.id
            if active_observations is not None else None,
            update_id=update_id,
        )
        for realization_number in realizations:
            response = rdb_api.get_response(
                name=response_key,
                realization_index=realization_number,
                ensemble_name=ensemble_name,
            )
            misfit_value = obs_vector.getTotalChi2(fs, realization_number)
            rdb_api._add_misfit(value=misfit_value,
                                link_id=link.id,
                                response_id=response.id)
Example #7
0
    def run(self, output_file, case_list=None, design_matrix_path=None, infer_iteration=True):
        cases = []

        if case_list is not None:
            if case_list.strip() == "*":
                cases = self.getAllCaseList()
            else:
                cases = case_list.split(",")

        if case_list is None or len(cases) == 0:
            cases = [self.ert().getEnkfFsManager().getCurrentFileSystem().getCaseName()]

        if design_matrix_path is not None:
            if not os.path.exists(design_matrix_path):
                raise UserWarning("The design matrix file does not exists!")

            if not os.path.isfile(design_matrix_path):
                raise UserWarning("The design matrix is not a file!")

        data = pandas.DataFrame()

        for index, case in enumerate(cases):
            case = case.strip()

            if not self.ert().getEnkfFsManager().caseExists(case):
                raise UserWarning("The case '%s' does not exist!" % case)

            if not self.ert().getEnkfFsManager().caseHasData(case):
                raise UserWarning("The case '%s' does not have any data!" % case)

            if infer_iteration:
                iteration_number = self.inferIterationNumber(case)
            else:
                iteration_number = index

            case_data = GenKwCollector.loadAllGenKwData(self.ert(), case)

            custom_kw_data = CustomKWCollector.loadAllCustomKWData(self.ert(), case)
            if not custom_kw_data.empty:
                case_data = case_data.join(custom_kw_data, how='outer')

            if design_matrix_path is not None:
                design_matrix_data = DesignMatrixReader.loadDesignMatrix(design_matrix_path)
                if not design_matrix_data.empty:
                    case_data = case_data.join(design_matrix_data, how='outer')

            misfit_data = MisfitCollector.loadAllMisfitData(self.ert(), case)
            if not misfit_data.empty:
                case_data = case_data.join(misfit_data, how='outer')

            summary_data = SummaryCollector.loadAllSummaryData(self.ert(), case)
            if not summary_data.empty:
                case_data = case_data.join(summary_data, how='outer')
            else:
                case_data["Date"] = None
                case_data.set_index(["Date"], append=True, inplace=True)

            case_data["Iteration"] = iteration_number
            case_data["Case"] = case
            case_data.set_index(["Case", "Iteration"], append=True, inplace=True)

            data = pandas.concat([data, case_data])

        data = data.reorder_levels(["Realization", "Iteration", "Date", "Case"])
        data.to_csv(output_file)

        export_info = "Exported %d rows and %d columns to %s." % (len(data.index), len(data.columns), output_file)
        return export_info
Example #8
0
    def run(self,
            target_name="analysis_case",
            prior_name=None,
            group_by="data_key"):
        """Perform analysis of parameters change per obs group
        prior to posterior of ahm"""
        ert = self.ert()
        facade = LibresFacade(self.ert())

        obs_keys = [
            facade.get_observation_key(nr)
            for nr, _ in enumerate(facade.get_observations())
        ]
        key_map = _group_observations(facade, obs_keys, group_by)

        prior_name, target_name = check_names(
            facade.get_current_case_name(),
            prior_name,
            target_name,
        )
        # Get the prior scalar parameter distributions
        prior_data = GenKwCollector.loadAllGenKwData(ert, prior_name)
        raise_if_empty(
            dataframes=[
                prior_data,
                MisfitCollector.loadAllMisfitData(ert, prior_name)
            ],
            messages=[
                "Empty prior ensemble",
                "Empty parameters set for History Matching",
            ],
        )

        # create dataframe with observations vectors (1 by 1 obs and also all_obs)
        combinations = make_obs_groups(key_map)

        field_parameters = sorted(ert.ensembleConfig().getKeylistFromImplType(
            ErtImplType.FIELD))
        scalar_parameters = sorted(ert.ensembleConfig().getKeylistFromImplType(
            ErtImplType.GEN_KW))
        # identify the set of actual parameters that was updated for now just go
        # through scalar parameters but in future if easier access to field parameter
        # updates should also include field parameters
        dkeysf = get_updated_parameters(prior_data, scalar_parameters)
        # setup dataframe for calculated data
        kolmogorov_smirnov_data, active_obs, misfitval = (
            pd.DataFrame(sorted(dkeysf), columns=["Parameters"]),
            pd.DataFrame(),
            pd.DataFrame(index=["misfit"]),
        )
        # loop over keys and calculate the KS matrix,
        # conditioning one parameter at the time.
        field_output = {}
        for group_name, obs_group in combinations.items():
            print("Processing:", group_name)

            #  Use localization to evaluate change of parameters for each observation
            with tempfile.TemporaryDirectory() as update_log_path:
                _run_ministep(
                    ert,
                    obs_group,
                    field_parameters + scalar_parameters,
                    prior_name,
                    target_name,
                    update_log_path,
                )
                # Get the active vs total observation info
                df_update_log = make_update_log_df(update_log_path)

            # Get the updated scalar parameter distributions
            self.reporter.publish_csv(
                group_name, GenKwCollector.loadAllGenKwData(ert, target_name))

            active_obs.at["ratio", group_name] = (
                str(count_active_observations(df_update_log)) + " active/" +
                str(len(df_update_log.index)))
            # Get misfit values
            misfitval[group_name] = [
                calc_observationsgroup_misfit(
                    group_name,
                    df_update_log,
                    MisfitCollector.loadAllMisfitData(ert, prior_name),
                )
            ]
            # Calculate Ks matrix for scalar parameters
            kolmogorov_smirnov_data[group_name] = kolmogorov_smirnov_data[
                "Parameters"].map(
                    calc_kolmogorov_smirnov(
                        dkeysf,
                        prior_data,
                        GenKwCollector.loadAllGenKwData(ert, target_name),
                    ))
            field_output[group_name] = _get_field_params(
                ert, facade.get_ensemble_size(), field_parameters, target_name)
        kolmogorov_smirnov_data.set_index("Parameters", inplace=True)

        # Calculate Ks matrix for Fields parameters
        if field_parameters:
            # Get grid characteristics to be able to plot field avg maps
            grid_xyzcenter = load_grid_to_dataframe(
                ert.eclConfig().get_gridfile())
            all_input_prior = _get_field_params(ert,
                                                facade.get_ensemble_size(),
                                                field_parameters, prior_name)

            for fieldparam in field_parameters:
                scaler = StandardScaler()
                scaler.fit(all_input_prior[fieldparam])
                pca = PCA(0.98).fit(
                    pd.DataFrame(scaler.transform(
                        all_input_prior[fieldparam])))
                pc_fieldprior_df = pd.DataFrame(data=pca.transform(
                    scaler.transform(all_input_prior[fieldparam])))
                all_kolmogorov_smirnov = pd.DataFrame(
                    pc_fieldprior_df.columns.tolist(),
                    columns=["PCFieldParameters"])
                # Get the posterior Field parameters
                map_calc_properties = (grid_xyzcenter[grid_xyzcenter["KZ"] ==
                                                      1].copy().reset_index())
                for group_name in combinations.keys():
                    map_calc_properties[
                        "Mean_D_" + group_name] = calc_mean_delta_grid(
                            field_output[group_name][fieldparam],
                            all_input_prior[fieldparam],
                            grid_xyzcenter,
                        )

                    pc_fieldpost_df = pd.DataFrame(data=pca.transform(
                        scaler.transform(field_output[group_name]
                                         [fieldparam])))
                    all_kolmogorov_smirnov[
                        group_name] = all_kolmogorov_smirnov[
                            "PCFieldParameters"].map(
                                calc_kolmogorov_smirnov(
                                    pc_fieldpost_df,
                                    pc_fieldprior_df,
                                    pc_fieldpost_df,
                                ))
                all_kolmogorov_smirnov.set_index("PCFieldParameters",
                                                 inplace=True)
                # add the field max Ks to the scalar Ks matrix
                kolmogorov_smirnov_data.loc[
                    "FIELD_" + fieldparam] = all_kolmogorov_smirnov.max()
                self.reporter.publish_csv("delta_field" + fieldparam,
                                          map_calc_properties)
        # save/export the Ks matrix, active_obs, misfitval and prior data
        self.reporter.publish_csv("ks", kolmogorov_smirnov_data)
        self.reporter.publish_csv("active_obs_info", active_obs)
        self.reporter.publish_csv("misfit_obs_info", misfitval)
        self.reporter.publish_csv("prior", prior_data)