Example #1
0
def dump_to_new_storage(reference=None, rdb_session=None, blob_session=None):

    start_time = time.time()
    logger.debug("Starting extraction...")

    if rdb_session is None:
        rdb_session = ERT_STORAGE.RdbSession()

    if blob_session is None:
        blob_session = ERT_STORAGE.BlobSession()

    rdb_api = RdbApi(session=rdb_session)
    blob_api = BlobApi(session=blob_session)

    try:
        priors = _extract_and_dump_priors(
            rdb_api=rdb_api) if reference is None else []

        ensemble = _create_ensemble(rdb_api,
                                    reference=reference,
                                    priors=priors)
        _extract_and_dump_observations(rdb_api=rdb_api, blob_api=blob_api)

        _extract_and_dump_parameters(
            rdb_api=rdb_api,
            blob_api=blob_api,
            ensemble_name=ensemble.name,
            priors=priors,
        )
        _extract_and_dump_responses(rdb_api=rdb_api,
                                    blob_api=blob_api,
                                    ensemble_name=ensemble.name)
        _extract_and_dump_update_data(ensemble.id, ensemble.name, rdb_api,
                                      blob_api)

        rdb_session.commit()
        blob_session.commit()
        ensemble_name = ensemble.name

        end_time = time.time()
        logger.debug(
            "Extraction done... (Took {:.2f} seconds)".format(end_time -
                                                              start_time))
        logger.debug("All ensembles in database: {}".format(", ".join(
            [ensemble.name for ensemble in rdb_api.get_all_ensembles()])))

    except:
        rdb_session.rollback()
        blob_session.rollback()
        raise
    finally:
        rdb_session.close()
        blob_session.close()

    return ensemble_name
Example #2
0
def dump_to_new_storage(reference=None,
                        rdb_connection=None,
                        blob_connection=None):

    start_time = time.time()
    logger.debug("Starting extraction...")
    if rdb_connection is None:
        rdb_url = "sqlite:///entities.db"
        rdb_connection = connections.get_rdb_connection(rdb_url)

    rdb_api = RdbApi(connection=rdb_connection)

    if blob_connection is None:
        blob_url = "sqlite:///blobs.db"
        blob_connection = connections.get_blob_connection(blob_url)

    blob_api = BlobApi(connection=blob_connection)

    with rdb_api, blob_api:
        priors = _extract_and_dump_priors(
            rdb_api=rdb_api) if reference is None else []

        ensemble = _create_ensemble(rdb_api,
                                    reference=reference,
                                    priors=priors)
        _extract_and_dump_observations(rdb_api=rdb_api, blob_api=blob_api)

        _extract_and_dump_parameters(
            rdb_api=rdb_api,
            blob_api=blob_api,
            ensemble_name=ensemble.name,
            priors=priors,
        )
        _extract_and_dump_responses(rdb_api=rdb_api,
                                    blob_api=blob_api,
                                    ensemble_name=ensemble.name)
        _extract_and_dump_update_data(ensemble.id, ensemble.name, rdb_api,
                                      blob_api)
        blob_api.commit()
        rdb_api.commit()
        ensemble_name = ensemble.name

        end_time = time.time()
        logger.debug(
            "Extraction done... (Took {:.2f} seconds)".format(end_time -
                                                              start_time))
        logger.debug("All ensembles in database: {}".format(", ".join(
            [ensemble.name for ensemble in rdb_api.get_all_ensembles()])))

    rdb_connection.close()
    blob_connection.close()

    return ensemble_name
Example #3
0
class StorageApi:
    def __init__(self, session):
        self._rdb_api = RdbApi(session)

    def _ensemble_minimal(self, ensemble):
        if ensemble is None:
            return None
        return {
            "name":
            ensemble.name,
            "time_created":
            ensemble.time_created.isoformat(),
            "ensemble_ref":
            ensemble.id,
            "parent": {
                "ensemble_ref": ensemble.parent.ensemble_reference.id,
                "name": ensemble.parent.ensemble_reference.name,
            } if ensemble.parent is not None else {},
            "children": [{
                "ensemble_ref": child.ensemble_result.id,
                "name": child.ensemble_result.name,
            } for child in ensemble.children],
        }

    def get_ensembles(self, filter=None):
        data = [
            self._ensemble_minimal(ensemble)
            for ensemble in self._rdb_api.get_all_ensembles()
        ]

        return {"ensembles": data}

    def get_realization(self, ensemble_id, realization_idx, filter):
        realization = self._rdb_api.get_realization_by_realization_idx(
            ensemble_id=ensemble_id, realization_idx=realization_idx)

        if realization is None:
            return None

        response_definitions = self._rdb_api.get_response_definitions_by_ensemble_id(
            ensemble_id=ensemble_id)
        responses = [{
            "name":
            resp_def.name,
            "response":
            self._rdb_api.get_response_by_realization_id(
                response_definition_id=resp_def.id,
                realization_id=realization.id,
            ),
        } for resp_def in response_definitions]

        parameter_definitions = self._rdb_api.get_parameter_definitions_by_ensemble_id(
            ensemble_id=ensemble_id)
        parameters = [{
            "name":
            param_def.name,
            "parameter":
            self._rdb_api.get_parameter_by_realization_id(
                parameter_definition_id=param_def.id,
                realization_id=realization.id,
            ),
        } for param_def in parameter_definitions]

        return_schema = {
            "name":
            realization.index,
            "responses": [{
                "name": res["name"],
                "data": res["response"].values
            } for res in responses],
            "parameters": [{
                "name": par["name"],
                "data": par["parameter"].value
            } for par in parameters],
        }

        return return_schema

    def _calculate_misfit(self, obs_value, response_values, obs_stds,
                          obs_data_indexes, obs_index):
        observation_std = obs_stds[obs_index]
        response_index = obs_data_indexes[obs_index]
        response_value = response_values[response_index]
        difference = response_value - obs_value
        misfit = (difference / observation_std)**2
        sign = difference > 0

        return {"value": misfit, "sign": sign, "obs_index": obs_index}

    def get_response(self, ensemble_id, response_name, filter):
        bundle = self._rdb_api.get_response_bundle(response_name=response_name,
                                                   ensemble_id=ensemble_id)
        if bundle is None:
            return None

        observation_links = bundle.observation_links
        responses = bundle.responses
        univariate_misfits = {}
        for resp in responses:
            resp_values = list(resp.values)
            univariate_misfits[resp.realization.index] = {}
            for link in observation_links:
                observation = link.observation
                obs_values = list(observation.values)
                obs_stds = list(observation.errors)
                obs_data_indexes = list(observation.data_indices)
                misfits = []
                for obs_index, obs_value in enumerate(obs_values):
                    misfits.append(
                        self._calculate_misfit(
                            obs_value,
                            resp_values,
                            obs_stds,
                            obs_data_indexes,
                            obs_index,
                        ))
                univariate_misfits[resp.realization.index][
                    observation.name] = misfits

        return_schema = {
            "name":
            response_name,
            "ensemble_id":
            ensemble_id,
            "realizations": [{
                "name": resp.realization.index,
                "realization_ref": resp.realization.index,
                "data": resp.values,
                "summarized_misfits": {
                    misfit.observation_response_definition_link.observation.
                    name: misfit.value
                    for misfit in resp.misfits
                },
                "univariate_misfits": {
                    obs_name: misfits
                    for obs_name, misfits in univariate_misfits[
                        resp.realization.index].items()
                },
            } for resp in responses],
            "axis": {
                "data": bundle.indices
            },
        }
        if len(observation_links) > 0:
            return_schema["observations"] = [
                self._obs_to_json(link.observation, link.active)
                for link in observation_links
            ]

        return return_schema

    def get_response_data(self, ensemble_id, response_name):
        bundle = self._rdb_api.get_response_bundle(response_name=response_name,
                                                   ensemble_id=ensemble_id)
        if bundle is None:
            return None
        responses = bundle.responses

        ids = [resp.values for resp in responses]
        return ids

    def get_observation(self, name):
        obs = self._rdb_api.get_observation(name)
        return None if obs is None else self._obs_to_json(obs)

    def get_observation_attributes(self, name):
        attrs = self._rdb_api.get_observation_attributes(name)
        return None if attrs is None else {"attributes": attrs}

    def get_observation_attribute(self, name, attribute):
        attr = self._rdb_api.get_observation_attribute(name, attribute)
        return None if attr is None else {"attributes": {attribute: attr}}

    def set_observation_attribute(self, name, attribute, value):
        obs = self._rdb_api.add_observation_attribute(name, attribute, value)
        if obs is None:
            return None
        return self._obs_to_json(obs)

    def get_ensemble(self, ensemble_id):
        ens = self._rdb_api.get_ensemble_by_id(ensemble_id)

        if ens is None:
            return None

        return_schema = self._ensemble_minimal(ens)
        return_schema.update({
            "realizations": [{
                "name": real.index,
                "realization_ref": real.index
            } for real in self._rdb_api.get_realizations_by_ensemble_id(
                ensemble_id)],
            "responses":
            [{
                "name": resp.name,
                "response_ref": resp.name
            }
             for resp in self._rdb_api.get_response_definitions_by_ensemble_id(
                 ensemble_id)],
            "parameters": [
                self._parameter_minimal(
                    name=par.name,
                    group=par.group,
                    prior=par.prior,
                    parameter_def_id=par.id,
                ) for par in self._rdb_api.
                get_parameter_definitions_by_ensemble_id(ensemble_id)
            ],
        })
        return return_schema

    def _obs_to_json(self, obs, active=None):
        data = {
            "name": obs.name,
            "data": {
                "values": {
                    "data": obs.values
                },
                "std": {
                    "data": obs.errors
                },
                "data_indexes": {
                    "data": obs.data_indices
                },
                "key_indexes": {
                    "data": obs.key_indices
                },
            },
        }
        if active is not None:
            data["data"]["active_mask"] = {"data": active}

        attrs = obs.get_attributes()
        if len(attrs) > 0:
            data["attributes"] = attrs

        return data

    def _parameter_minimal(self, name, group, prior, parameter_def_id):
        return {
            "key": name,
            "group": group,
            "parameter_ref": parameter_def_id,
            "prior": {
                "function": prior.function,
                "parameter_names": prior.parameter_names,
                "parameter_values": prior.parameter_values,
            } if prior is not None else {},
        }

    def get_parameter(self, ensemble_id, parameter_def_id):
        bundle = self._rdb_api.get_parameter_bundle(
            parameter_def_id=parameter_def_id, ensemble_id=ensemble_id)
        if bundle is None:
            return None

        return_schema = self._parameter_minimal(
            name=bundle.name,
            group=bundle.group,
            prior=bundle.prior,
            parameter_def_id=parameter_def_id,
        )

        return_schema["parameter_realizations"] = [{
            "name": param.realization.index,
            "data": param.value,
            "realization": {
                "realization_ref": param.realization.index
            },
        } for param in bundle.parameters]
        return return_schema

    def get_parameter_data(self, ensemble_id, parameter_def_id):
        bundle = self._rdb_api.get_parameter_bundle(
            parameter_def_id=parameter_def_id, ensemble_id=ensemble_id)
        if bundle is None:
            return None

        ids = [param.value for param in bundle.parameters]
        return ids